mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Merge pull request #56519 from rschu1ze/qc-ignore-nondeterministic-queries
Query cache: Allow to ignore non-deterministic queries
This commit is contained in:
commit
361a3ca11f
@ -169,7 +169,12 @@ Also, results of queries with non-deterministic functions are not cached by defa
|
|||||||
[`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
|
[`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
|
||||||
|
|
||||||
To force caching of results of queries with non-deterministic functions regardless, use setting
|
To force caching of results of queries with non-deterministic functions regardless, use setting
|
||||||
[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
|
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
|
||||||
|
|
||||||
|
:::note
|
||||||
|
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
|
||||||
|
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.
|
||||||
|
:::
|
||||||
|
|
||||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||||
|
@ -1657,16 +1657,17 @@ Possible values:
|
|||||||
|
|
||||||
Default value: `1`.
|
Default value: `1`.
|
||||||
|
|
||||||
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query-cache-store-results-of-queries-with-nondeterministic-functions}
|
## query_cache_nondeterministic_function_handling {#query-cache-nondeterministic-function-handling}
|
||||||
|
|
||||||
If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
|
Controls how the [query cache](../query-cache.md) handles `SELECT` queries with non-deterministic functions like `rand()` or `now()`.
|
||||||
|
|
||||||
Possible values:
|
Possible values:
|
||||||
|
|
||||||
- 0 - Disabled
|
- `'throw'` - Throw an exception and don't cache the query result.
|
||||||
- 1 - Enabled
|
- `'save'` - Cache the query result.
|
||||||
|
- `'ignore'` - Don't cache the query result and don't throw an exception.
|
||||||
|
|
||||||
Default value: `0`.
|
Default value: `throw`.
|
||||||
|
|
||||||
## query_cache_min_query_runs {#query-cache-min-query-runs}
|
## query_cache_min_query_runs {#query-cache-min-query-runs}
|
||||||
|
|
||||||
|
@ -1381,7 +1381,7 @@ toStartOfFifteenMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:15:00
|
|||||||
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00
|
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00
|
||||||
```
|
```
|
||||||
|
|
||||||
## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\])
|
## toStartOfInterval(date_or_date_with_time, INTERVAL x unit \[, time_zone\])
|
||||||
|
|
||||||
This function generalizes other `toStartOf*()` functions. For example,
|
This function generalizes other `toStartOf*()` functions. For example,
|
||||||
- `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`,
|
- `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`,
|
||||||
|
@ -583,7 +583,7 @@
|
|||||||
M(701, CLUSTER_DOESNT_EXIST) \
|
M(701, CLUSTER_DOESNT_EXIST) \
|
||||||
M(702, CLIENT_INFO_DOES_NOT_MATCH) \
|
M(702, CLIENT_INFO_DOES_NOT_MATCH) \
|
||||||
M(703, INVALID_IDENTIFIER) \
|
M(703, INVALID_IDENTIFIER) \
|
||||||
M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
|
M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \
|
||||||
M(705, TABLE_NOT_EMPTY) \
|
M(705, TABLE_NOT_EMPTY) \
|
||||||
M(706, LIBSSH_ERROR) \
|
M(706, LIBSSH_ERROR) \
|
||||||
M(999, KEEPER_EXCEPTION) \
|
M(999, KEEPER_EXCEPTION) \
|
||||||
|
@ -617,7 +617,7 @@ class IColumn;
|
|||||||
M(Bool, use_query_cache, false, "Enable the query cache", 0) \
|
M(Bool, use_query_cache, false, "Enable the query cache", 0) \
|
||||||
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
||||||
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
||||||
M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \
|
M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \
|
||||||
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
|
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
|
||||||
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
|
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
|
||||||
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
|
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
|
||||||
@ -876,6 +876,7 @@ class IColumn;
|
|||||||
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
|
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
|
||||||
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
|
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
|
||||||
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
|
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
|
||||||
|
MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
|
||||||
|
|
||||||
/** The section above is for obsolete settings. Do not add anything there. */
|
/** The section above is for obsolete settings. Do not add anything there. */
|
||||||
|
|
||||||
|
@ -69,6 +69,12 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P
|
|||||||
{"allow", DistributedProductMode::ALLOW}})
|
{"allow", DistributedProductMode::ALLOW}})
|
||||||
|
|
||||||
|
|
||||||
|
IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
{{"throw", QueryCacheNondeterministicFunctionHandling::Throw},
|
||||||
|
{"save", QueryCacheNondeterministicFunctionHandling::Save},
|
||||||
|
{"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
|
||||||
|
|
||||||
|
|
||||||
IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
|
IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
|
||||||
{{"basic", FormatSettings::DateTimeInputFormat::Basic},
|
{{"basic", FormatSettings::DateTimeInputFormat::Basic},
|
||||||
{"best_effort", FormatSettings::DateTimeInputFormat::BestEffort},
|
{"best_effort", FormatSettings::DateTimeInputFormat::BestEffort},
|
||||||
|
@ -70,6 +70,16 @@ enum class DistributedProductMode
|
|||||||
|
|
||||||
DECLARE_SETTING_ENUM(DistributedProductMode)
|
DECLARE_SETTING_ENUM(DistributedProductMode)
|
||||||
|
|
||||||
|
/// How the query cache handles queries with non-deterministic functions, e.g. now()
|
||||||
|
enum class QueryCacheNondeterministicFunctionHandling
|
||||||
|
{
|
||||||
|
Throw,
|
||||||
|
Save,
|
||||||
|
Ignore
|
||||||
|
};
|
||||||
|
|
||||||
|
DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling)
|
||||||
|
|
||||||
|
|
||||||
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)
|
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ namespace DB
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS;
|
extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS;
|
||||||
extern const int INTO_OUTFILE_NOT_ALLOWED;
|
extern const int INTO_OUTFILE_NOT_ALLOWED;
|
||||||
extern const int INVALID_TRANSACTION;
|
extern const int INVALID_TRANSACTION;
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
@ -1106,32 +1106,41 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
|||||||
/// top of the pipeline which stores the result in the query cache.
|
/// top of the pipeline which stores the result in the query cache.
|
||||||
if (can_use_query_cache && settings.enable_writes_to_query_cache)
|
if (can_use_query_cache && settings.enable_writes_to_query_cache)
|
||||||
{
|
{
|
||||||
if (astContainsNonDeterministicFunctions(ast, context) && !settings.query_cache_store_results_of_queries_with_nondeterministic_functions)
|
const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
|
||||||
throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS,
|
const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
|
||||||
"Unable to cache the query result because the query contains a non-deterministic function. Use setting `query_cache_store_results_of_queries_with_nondeterministic_functions = 1` to cache the query result regardless");
|
|
||||||
|
|
||||||
QueryCache::Key key(
|
if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
|
||||||
ast, res.pipeline.getHeader(),
|
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS,
|
||||||
context->getUserName(), settings.query_cache_share_between_users,
|
"The query result was not cached because the query contains a non-deterministic function."
|
||||||
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
|
" Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
|
||||||
settings.query_cache_compress_entries);
|
|
||||||
|
|
||||||
const size_t num_query_runs = query_cache->recordQueryRun(key);
|
if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
|
||||||
if (num_query_runs <= settings.query_cache_min_query_runs)
|
|
||||||
{
|
{
|
||||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}", num_query_runs, settings.query_cache_min_query_runs);
|
QueryCache::Key key(
|
||||||
}
|
ast, res.pipeline.getHeader(),
|
||||||
else
|
context->getUserName(), settings.query_cache_share_between_users,
|
||||||
{
|
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
|
||||||
auto query_cache_writer = std::make_shared<QueryCache::Writer>(query_cache->createWriter(
|
settings.query_cache_compress_entries);
|
||||||
key,
|
|
||||||
std::chrono::milliseconds(settings.query_cache_min_query_duration.totalMilliseconds()),
|
const size_t num_query_runs = query_cache->recordQueryRun(key);
|
||||||
settings.query_cache_squash_partial_results,
|
if (num_query_runs <= settings.query_cache_min_query_runs)
|
||||||
settings.max_block_size,
|
{
|
||||||
settings.query_cache_max_size_in_bytes,
|
LOG_TRACE(&Poco::Logger::get("QueryCache"),
|
||||||
settings.query_cache_max_entries));
|
"Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}",
|
||||||
res.pipeline.writeResultIntoQueryCache(query_cache_writer);
|
num_query_runs, settings.query_cache_min_query_runs);
|
||||||
query_cache_usage = QueryCache::Usage::Write;
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto query_cache_writer = std::make_shared<QueryCache::Writer>(query_cache->createWriter(
|
||||||
|
key,
|
||||||
|
std::chrono::milliseconds(settings.query_cache_min_query_duration.totalMilliseconds()),
|
||||||
|
settings.query_cache_squash_partial_results,
|
||||||
|
settings.max_block_size,
|
||||||
|
settings.query_cache_max_size_in_bytes,
|
||||||
|
settings.query_cache_max_entries));
|
||||||
|
res.pipeline.writeResultIntoQueryCache(query_cache_writer);
|
||||||
|
query_cache_usage = QueryCache::Usage::Write;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
|
-- query_cache_nondeterministic_function_handling = throw
|
||||||
0
|
0
|
||||||
---
|
-- query_cache_nondeterministic_function_handling = save
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
-- query_cache_nondeterministic_function_handling = ignore
|
||||||
|
1
|
||||||
|
0
|
||||||
|
@ -3,14 +3,21 @@
|
|||||||
|
|
||||||
SYSTEM DROP QUERY CACHE;
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
|
||||||
-- rand() is non-deterministic, the query is rejected by default
|
SELECT '-- query_cache_nondeterministic_function_handling = throw';
|
||||||
SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
|
SELECT count(now()) SETTINGS use_query_cache = true; -- { serverError QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS }
|
||||||
SELECT COUNT(*) FROM system.query_cache;
|
SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS }
|
||||||
|
SELECT count(*) FROM system.query_cache;
|
||||||
SELECT '---';
|
|
||||||
|
SYSTEM DROP QUERY CACHE;
|
||||||
-- Force caching using a setting
|
|
||||||
SELECT COUNT(RAND(1)) SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true;
|
SELECT '-- query_cache_nondeterministic_function_handling = save';
|
||||||
SELECT COUNT(*) FROM system.query_cache;
|
SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'save';
|
||||||
|
SELECT count(*) FROM system.query_cache;
|
||||||
|
|
||||||
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
|
||||||
|
SELECT '-- query_cache_nondeterministic_function_handling = ignore';
|
||||||
|
SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'ignore';
|
||||||
|
SELECT count(*) FROM system.query_cache;
|
||||||
|
|
||||||
SYSTEM DROP QUERY CACHE;
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
@ -42,6 +42,7 @@ optimize_duplicate_order_by_and_distinct
|
|||||||
optimize_fuse_sum_count_avg
|
optimize_fuse_sum_count_avg
|
||||||
parallel_replicas_min_number_of_granules_to_enable
|
parallel_replicas_min_number_of_granules_to_enable
|
||||||
partial_merge_join_optimizations
|
partial_merge_join_optimizations
|
||||||
|
query_cache_store_results_of_queries_with_nondeterministic_functions
|
||||||
query_plan_optimize_projection
|
query_plan_optimize_projection
|
||||||
replication_alter_columns_timeout
|
replication_alter_columns_timeout
|
||||||
restore_threads
|
restore_threads
|
||||||
|
Loading…
Reference in New Issue
Block a user