mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #62376 from rschu1ze/qc-system-tables
Query cache: ignore results of queries against system tables
This commit is contained in:
commit
99cef9deee
@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
|
||||
|
||||
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
|
||||
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
|
||||
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
|
||||
may return cached results then.
|
||||
use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then.
|
||||
|
||||
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
|
||||
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
|
||||
@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa
|
||||
To force caching of results of queries with non-deterministic functions regardless, use setting
|
||||
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
|
||||
|
||||
Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force
|
||||
caching of results of queries with system tables regardless, use setting
|
||||
[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling).
|
||||
|
||||
:::note
|
||||
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
|
||||
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.
|
||||
|
@ -1689,6 +1689,18 @@ Possible values:
|
||||
|
||||
Default value: `throw`.
|
||||
|
||||
## query_cache_system_table_handling {#query-cache-system-table-handling}
|
||||
|
||||
Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `'throw'` - Throw an exception and don't cache the query result.
|
||||
- `'save'` - Cache the query result.
|
||||
- `'ignore'` - Don't cache the query result and don't throw an exception.
|
||||
|
||||
Default value: `throw`.
|
||||
|
||||
## query_cache_min_query_runs {#query-cache-min-query-runs}
|
||||
|
||||
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
|
||||
|
@ -597,6 +597,7 @@
|
||||
M(716, CANNOT_FORGET_PARTITION) \
|
||||
M(717, EXPERIMENTAL_FEATURE_ERROR) \
|
||||
M(718, TOO_SLOW_PARSING) \
|
||||
M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
|
||||
\
|
||||
M(900, DISTRIBUTED_CACHE_ERROR) \
|
||||
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
|
||||
|
@ -669,6 +669,7 @@ class IColumn;
|
||||
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
||||
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
||||
M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \
|
||||
M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, "How the query cache handles queries against system tables, i.e. tables in databases 'system.*' and 'information_schema.*'", 0) \
|
||||
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
|
||||
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
|
||||
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
|
||||
|
@ -87,6 +87,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{
|
||||
{"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
|
||||
{"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
|
||||
{"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
|
||||
}},
|
||||
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
|
||||
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
|
||||
|
@ -87,6 +87,10 @@ IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::B
|
||||
{"save", QueryCacheNondeterministicFunctionHandling::Save},
|
||||
{"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(QueryCacheSystemTableHandling, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"throw", QueryCacheSystemTableHandling::Throw},
|
||||
{"save", QueryCacheSystemTableHandling::Save},
|
||||
{"ignore", QueryCacheSystemTableHandling::Ignore}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"basic", FormatSettings::DateTimeInputFormat::Basic},
|
||||
|
@ -184,6 +184,15 @@ enum class QueryCacheNondeterministicFunctionHandling
|
||||
|
||||
DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling)
|
||||
|
||||
/// How the query cache handles queries against system tables, tables in databases 'system.*' and 'information_schema.*'
|
||||
enum class QueryCacheSystemTableHandling
|
||||
{
|
||||
Throw,
|
||||
Save,
|
||||
Ignore
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(QueryCacheSystemTableHandling)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)
|
||||
|
||||
|
@ -2,11 +2,17 @@
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/IParser.h>
|
||||
#include <Parsers/TokenIterator.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseDatabaseAndTableName.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/TTLCachePolicy.h>
|
||||
@ -52,7 +58,54 @@ struct HasNonDeterministicFunctionsMatcher
|
||||
}
|
||||
};
|
||||
|
||||
struct HasSystemTablesMatcher
|
||||
{
|
||||
struct Data
|
||||
{
|
||||
const ContextPtr context;
|
||||
bool has_system_tables = false;
|
||||
};
|
||||
|
||||
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
|
||||
|
||||
static void visit(const ASTPtr & node, Data & data)
|
||||
{
|
||||
if (data.has_system_tables)
|
||||
return;
|
||||
|
||||
String database_table; /// or whatever else we get, e.g. just a table
|
||||
|
||||
/// SELECT [...] FROM <table>
|
||||
if (const auto * table_identifier = node->as<ASTTableIdentifier>())
|
||||
{
|
||||
database_table = table_identifier->name();
|
||||
}
|
||||
/// SELECT [...] FROM clusterAllReplicas(<cluster>, <table>)
|
||||
else if (const auto * identifier = node->as<ASTIdentifier>())
|
||||
{
|
||||
database_table = identifier->name();
|
||||
}
|
||||
/// Handle SELECT [...] FROM clusterAllReplicas(<cluster>, '<table>')
|
||||
else if (const auto * literal = node->as<ASTLiteral>())
|
||||
{
|
||||
const auto & value = literal->value;
|
||||
database_table = toString(value);
|
||||
}
|
||||
|
||||
Tokens tokens(database_table.c_str(), database_table.c_str() + database_table.size(), /*max_query_size*/ 2048, /*skip_insignificant*/ true);
|
||||
IParser::Pos pos(tokens, /*max_depth*/ 42, /*max_backtracks*/ 42);
|
||||
Expected expected;
|
||||
String database;
|
||||
String table;
|
||||
bool successfully_parsed = parseDatabaseAndTableName(pos, expected, database, table);
|
||||
if (successfully_parsed)
|
||||
if (DatabaseCatalog::isPredefinedDatabase(database))
|
||||
data.has_system_tables = true;
|
||||
}
|
||||
};
|
||||
|
||||
using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor<HasNonDeterministicFunctionsMatcher, true>;
|
||||
using HasSystemTablesVisitor = InDepthNodeVisitor<HasSystemTablesMatcher, true>;
|
||||
|
||||
}
|
||||
|
||||
@ -63,6 +116,13 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context)
|
||||
return finder_data.has_non_deterministic_functions;
|
||||
}
|
||||
|
||||
bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
|
||||
{
|
||||
HasSystemTablesMatcher::Data finder_data{context};
|
||||
HasSystemTablesVisitor(finder_data).visit(ast);
|
||||
return finder_data.has_system_tables;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
@ -17,6 +17,9 @@ namespace DB
|
||||
/// Does AST contain non-deterministic functions like rand() and now()?
|
||||
bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
|
||||
|
||||
/// Does AST contain system tables like "system.processes"?
|
||||
bool astContainsSystemTables(ASTPtr ast, ContextPtr context);
|
||||
|
||||
/// Maps queries to query results. Useful to avoid repeated query calculation.
|
||||
///
|
||||
/// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated
|
||||
|
@ -97,6 +97,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS;
|
||||
extern const int QUERY_CACHE_USED_WITH_SYSTEM_TABLE;
|
||||
extern const int INTO_OUTFILE_NOT_ALLOWED;
|
||||
extern const int INVALID_TRANSACTION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
@ -1187,15 +1188,26 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
/// top of the pipeline which stores the result in the query cache.
|
||||
if (can_use_query_cache && settings.enable_writes_to_query_cache)
|
||||
{
|
||||
/// Only use the query cache if the query does not contain non-deterministic functions or system tables (which are typically non-deterministic)
|
||||
|
||||
const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
|
||||
const bool ast_contains_system_tables = astContainsSystemTables(ast, context);
|
||||
|
||||
const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
|
||||
const QueryCacheSystemTableHandling system_table_handling = settings.query_cache_system_table_handling;
|
||||
|
||||
if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
|
||||
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS,
|
||||
"The query result was not cached because the query contains a non-deterministic function."
|
||||
" Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
|
||||
|
||||
if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
|
||||
if (ast_contains_system_tables && system_table_handling == QueryCacheSystemTableHandling::Throw)
|
||||
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_SYSTEM_TABLE,
|
||||
"The query result was not cached because the query contains a system table."
|
||||
" Use setting `query_cache_system_table_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
|
||||
|
||||
if ((!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
|
||||
&& (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
|
||||
{
|
||||
QueryCache::Key key(
|
||||
ast, res.pipeline.getHeader(),
|
||||
|
@ -7,6 +7,7 @@ DROP TABLE IF EXISTS eligible_test2;
|
||||
|
||||
-- enable query cache session-wide but also force it individually in each of below statements
|
||||
SET use_query_cache = true;
|
||||
SET query_cache_system_table_handling = 'save';
|
||||
|
||||
-- check that SELECT statements create entries in the query cache ...
|
||||
SELECT 1 SETTINGS use_query_cache = true;
|
||||
|
@ -2,6 +2,7 @@
|
||||
-- Tag no-parallel: Messes with internal cache
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
SET query_cache_system_table_handling = 'save';
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
|
@ -12,11 +12,10 @@ SYSTEM STOP MERGES t_cache_sparse;
|
||||
INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000);
|
||||
INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000);
|
||||
|
||||
SET use_query_cache = 1;
|
||||
SET max_threads = 1;
|
||||
|
||||
SELECT v FROM t_cache_sparse FORMAT Null;
|
||||
SELECT v FROM t_cache_sparse FORMAT Null;
|
||||
SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null;
|
||||
SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null;
|
||||
SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%';
|
||||
|
||||
DROP TABLE t_cache_sparse;
|
||||
|
@ -0,0 +1,13 @@
|
||||
The Default for query_cache_system_table_handling is = throw
|
||||
0
|
||||
Check behavior of query_cache_system_table_handling = throw
|
||||
0
|
||||
Check behavior of query_cache_system_table_handling = save
|
||||
0
|
||||
1
|
||||
Check behavior of query_cache_system_table_handling = ignore
|
||||
0
|
||||
0
|
||||
Other tests
|
||||
0
|
||||
0
|
@ -0,0 +1,64 @@
|
||||
-- Tags: no-parallel
|
||||
-- Tag no-parallel: Messes with internal cache
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
SELECT 'The Default for query_cache_system_table_handling is = throw';
|
||||
-- Test that the query cache rejects queries that involve system tables.
|
||||
SELECT * FROM system.one SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
SELECT count(*) FROM system.query_cache;
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
SELECT 'Check behavior of query_cache_system_table_handling = throw';
|
||||
-- Test that the query cache rejects queries that involve system tables.
|
||||
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
SELECT count(*) FROM system.query_cache;
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
SELECT 'Check behavior of query_cache_system_table_handling = save';
|
||||
-- Test that the query cache saves the result of queries that involve system tables.
|
||||
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'save';
|
||||
SELECT count(*) FROM system.query_cache;
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
SELECT 'Check behavior of query_cache_system_table_handling = ignore';
|
||||
-- Test that the query cache ignores the result of queries that involve system tables.
|
||||
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'ignore';
|
||||
SELECT count(*) FROM system.query_cache;
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
SELECT 'Other tests';
|
||||
|
||||
-- Edge case which doesn't work well due to conceptual reasons (QueryCache is AST-based), test it anyways to have it documented.
|
||||
USE system;
|
||||
SELECT * FROM one SETTINGS use_query_cache = 1; -- doesn't throw but should
|
||||
|
||||
-- This query uses system.zero internally. Since the query cache works at AST level it does not "see' system.zero and must not complain.
|
||||
SELECT * SETTINGS use_query_cache = 1;
|
||||
|
||||
-- information_schema is also treated as a system table
|
||||
SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
|
||||
-- System tables can be "hidden" inside e.g. table functions
|
||||
SELECT * FROM clusterAllReplicas('test_shard_localhost', system.one) SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
SELECT * FROM clusterAllReplicas('test_shard_localhost', 'system.one') SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
|
||||
-- Criminal edge case that a user creates a table named "system". The query cache must not reject queries against it.
|
||||
DROP TABLE IF EXISTS system;
|
||||
CREATE TABLE system (c UInt64) ENGINE = Memory;
|
||||
SElECT * FROM system SETTINGS use_query_cache = 1;
|
||||
DROP TABLE system;
|
||||
|
||||
-- But queries against system.system are rejected.
|
||||
DROP TABLE IF EXISTS system.system;
|
||||
CREATE TABLE system.system (c UInt64) ENGINE = Memory;
|
||||
SElECT * FROM system.system SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
|
||||
DROP TABLE system.system;
|
||||
|
||||
-- Cleanup
|
||||
SYSTEM DROP QUERY CACHE;
|
Loading…
Reference in New Issue
Block a user