From a15029ecb593e218cabeb3ae2af5d2afa3f22c6e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 19 Sep 2023 16:29:02 +0000 Subject: [PATCH 1/4] Query Cache: Reject queries with non-deterministic functions by default https://github.com/ClickHouse/support-escalation/issues/963 --- docs/en/operations/query-cache.md | 4 ++-- src/Common/ErrorCodes.cpp | 1 + src/Interpreters/executeQuery.cpp | 14 +++++++++----- ...uery_cache_nondeterministic_functions.reference | 1 - ...2494_query_cache_nondeterministic_functions.sql | 6 +++--- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index bbde77338af..e111206355e 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -142,7 +142,7 @@ As a result, the query cache stores for each query multiple (partial) result blocks. While this behavior is a good default, it can be suppressed using setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results). -Also, results of queries with non-deterministic functions are not cached. Such functions include +Also, results of queries with non-deterministic functions are not cached by default. Such functions include - functions for accessing dictionaries: [`dictGet()`](../sql-reference/functions/ext-dict-functions.md#dictGet) etc. - [user-defined functions](../sql-reference/statements/create/function.md), - functions which return the current date or time: [`now()`](../sql-reference/functions/date-time-functions.md#now), @@ -158,7 +158,7 @@ Also, results of queries with non-deterministic functions are not cached. Such f - functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser), [`queryID()`](../sql-reference/functions/other-functions.md#queryID), [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc. -Caching of non-deterministic functions can be forced regardless using setting +To force caching of results of queries with non-deterministic functionsregardless, using setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index f23685c37d1..ad34516b00e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -585,6 +585,7 @@ M(700, USER_SESSION_LIMIT_EXCEEDED) \ M(701, CLUSTER_DOESNT_EXIST) \ M(702, CLIENT_INFO_DOES_NOT_MATCH) \ + M(703, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 310af2f9812..9b24b5df9b2 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -94,11 +94,12 @@ namespace DB namespace ErrorCodes { + extern const int CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS; extern const int INTO_OUTFILE_NOT_ALLOWED; - extern const int QUERY_WAS_CANCELLED; extern const int INVALID_TRANSACTION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int QUERY_WAS_CANCELLED; } @@ -991,7 +992,7 @@ static std::tuple executeQueryImpl( if (!async_insert) { - /// If it is a non-internal SELECT, and passive/read use of the query cache is enabled, and the cache knows the query, then set + /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set /// a pipeline with a source populated by the query cache. auto get_result_from_query_cache = [&]() { @@ -1091,11 +1092,14 @@ static std::tuple executeQueryImpl( res = interpreter->execute(); - /// If it is a non-internal SELECT query, and active/write use of the query cache is enabled, then add a processor on + /// If it is a non-internal SELECT query, and active (write) use of the query cache is enabled, then add a processor on /// top of the pipeline which stores the result in the query cache. - if (can_use_query_cache && settings.enable_writes_to_query_cache - && (!astContainsNonDeterministicFunctions(ast, context) || settings.query_cache_store_results_of_queries_with_nondeterministic_functions)) + if (can_use_query_cache && settings.enable_writes_to_query_cache) { + if (astContainsNonDeterministicFunctions(ast, context) && !settings.query_cache_store_results_of_queries_with_nondeterministic_functions) + throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS, + "Unable to cache the query result because the query contains a non-deterministic function. Use setting query_cache_store_results_of_queries_with_nondeterministic_functions = 1 to store the query result regardless."); + QueryCache::Key key( ast, res.pipeline.getHeader(), context->getUserName(), settings.query_cache_share_between_users, diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference index cb6165c307a..e666f54d4c4 100644 --- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference @@ -1,4 +1,3 @@ -1 0 --- 1 diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql index 045b7258a34..3a2e24d6bfe 100644 --- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql @@ -3,13 +3,13 @@ SYSTEM DROP QUERY CACHE; --- rand() is non-deterministic, with default settings no entry in the query cache should be created -SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; +-- rand() is non-deterministic, the query is rejected by default +SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError 703 } SELECT COUNT(*) FROM system.query_cache; SELECT '---'; --- But an entry can be forced using a setting +-- Force caching using a setting SELECT COUNT(RAND(1)) SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true; SELECT COUNT(*) FROM system.query_cache; From 5111f1e0901f13a758408a7aee39baba586eeb29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 19 Sep 2023 18:59:00 +0200 Subject: [PATCH 2/4] Update docs/en/operations/query-cache.md Co-authored-by: Nikita Taranov --- docs/en/operations/query-cache.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index e111206355e..6e21b0b3658 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -158,7 +158,7 @@ Also, results of queries with non-deterministic functions are not cached by defa - functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser), [`queryID()`](../sql-reference/functions/other-functions.md#queryID), [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc. -To force caching of results of queries with non-deterministic functionsregardless, using setting +To force caching of results of queries with non-deterministic functions regardless, use setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a From a175a7e0fcd696e08f3262582b5dbb7074259668 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 20 Sep 2023 09:04:35 +0000 Subject: [PATCH 3/4] Use error name instead of error code --- .../02494_query_cache_nondeterministic_functions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql index 3a2e24d6bfe..62e0b099d7a 100644 --- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql @@ -4,7 +4,7 @@ SYSTEM DROP QUERY CACHE; -- rand() is non-deterministic, the query is rejected by default -SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError 703 } +SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS } SELECT COUNT(*) FROM system.query_cache; SELECT '---'; From c75f7c843456fe184f1d7d5e40e77d27123a441b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 20 Sep 2023 13:27:11 +0000 Subject: [PATCH 4/4] Correct merge result --- src/Common/ErrorCodes.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 95614c7e9ce..be2b0a7bd5e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -585,8 +585,9 @@ M(700, USER_SESSION_LIMIT_EXCEEDED) \ M(701, CLUSTER_DOESNT_EXIST) \ M(702, CLIENT_INFO_DOES_NOT_MATCH) \ - M(703, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \ - M(704, INVALID_IDENTIFIER) \ + M(703, INVALID_IDENTIFIER) \ + M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \ + \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ M(1001, STD_EXCEPTION) \