Merge pull request #68235 from sakulali/query_cache_tag

QueryCache: Add tagging
This commit is contained in:
Robert Schulze 2024-08-13 10:44:10 +00:00 committed by GitHub
commit 99282e526a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 101 additions and 9 deletions

View File

@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
considers results of the same query with different tags different.
Example for creating three different query cache entries for the same query:
```sql
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
```
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks

View File

@ -1800,6 +1800,17 @@ Possible values:
Default value: `0`.
## query_cache_tag {#query-cache-tag}
A string which acts as a label for [query cache](../query-cache.md) entries.
The same queries with different tags are considered different by the query cache.
Possible values:
- Any string
Default value: `''`
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.

View File

@ -9,6 +9,7 @@ Columns:
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
@ -26,6 +27,7 @@ Row 1:
──────
query: SELECT 1 SETTINGS use_query_cache = 1
result_size: 128
tag:
stale: 0
shared: 0
compressed: 1

View File

@ -676,6 +676,7 @@ class IColumn;
M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \
M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
M(String, query_cache_tag, "", "A string which acts as a label for query cache entries. The same queries with different tags are considered different by the query cache.", 0) \
M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \

View File

@ -83,6 +83,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
{"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"query_cache_tag", "", "", "New setting for labeling query cache settings."},
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
{"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},

View File

@ -128,7 +128,7 @@ namespace
bool isQueryCacheRelatedSetting(const String & setting_name)
{
return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
return (setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache")) && setting_name != "query_cache_tag";
}
class RemoveQueryCacheSettingsMatcher
@ -242,11 +242,18 @@ QueryCache::Key::Key(
, expires_at(expires_at_)
, is_compressed(is_compressed_)
, query_string(queryStringFromAST(ast_))
, tag(settings.query_cache_tag)
{
}
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
QueryCache::Key::Key(
ASTPtr ast_,
const String & current_database,
const Settings & settings,
std::optional<UUID> user_id_,
const std::vector<UUID> & current_user_roles_)
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false)
/// ^^ dummy values for everything != AST, current database, user name/roles
{
}

View File

@ -88,6 +88,11 @@ public:
/// SYSTEM.QUERY_CACHE.
const String query_string;
/// A tag (namespace) for distinguish multiple entries of the same query.
/// This member has currently no use besides that SYSTEM.QUERY_CACHE can populate the 'tag' column conveniently without having to
/// compute the tag from the query AST.
const String tag;
/// Ctor to construct a Key for writing into query cache.
Key(ASTPtr ast_,
const String & current_database,
@ -99,7 +104,10 @@ public:
bool is_compressed);
/// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
Key(ASTPtr ast_,
const String & current_database,
const Settings & settings,
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
bool operator==(const Key & other) const;
};

View File

@ -1,6 +1,7 @@
#include "StorageSystemQueryCache.h"
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/Cache/QueryCache.h>
#include <Interpreters/Context.h>
@ -15,6 +16,7 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription()
{
{"query", std::make_shared<DataTypeString>(), "Query string."},
{"result_size", std::make_shared<DataTypeUInt64>(), "Size of the query cache entry."},
{"tag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Tag of the query cache entry."},
{"stale", std::make_shared<DataTypeUInt8>(), "If the query cache entry is stale."},
{"shared", std::make_shared<DataTypeUInt8>(), "If the query cache entry is shared between multiple users."},
{"compressed", std::make_shared<DataTypeUInt8>(), "If the query cache entry is compressed."},
@ -51,11 +53,12 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
res_columns[0]->insert(key.query_string); /// approximates the original query string
res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result));
res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now());
res_columns[3]->insert(key.is_shared);
res_columns[4]->insert(key.is_compressed);
res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
res_columns[6]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258)
res_columns[2]->insert(key.tag);
res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now());
res_columns[4]->insert(key.is_shared);
res_columns[5]->insert(key.is_compressed);
res_columns[6]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
res_columns[7]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258)
}
}

View File

@ -0,0 +1,12 @@
1
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc
---
1
1
SELECT 1 SETTINGS use_query_cache = true
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc
---
1
1
SELECT 1 SETTINGS use_query_cache = true abc
SELECT 1 SETTINGS use_query_cache = true def

View File

@ -0,0 +1,34 @@
-- Tags: no-parallel
-- Tag no-parallel: Messes with internal cache
SYSTEM DROP QUERY CACHE;
-- Store the result a single query with a tag in the query cache and check that the system table knows about the tag
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc';
SELECT query, tag FROM system.query_cache;
SELECT '---';
SYSTEM DROP QUERY CACHE;
-- Store the result of the same query with two different tags. The cache should store two entries.
SELECT 1 SETTINGS use_query_cache = true; -- default query_cache_tag = ''
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc';
SELECT query, tag FROM system.query_cache ORDER BY ALL;
SELECT '---';
SYSTEM DROP QUERY CACHE;
-- Like before but the tag is set standalone.
SET query_cache_tag = 'abc';
SELECT 1 SETTINGS use_query_cache = true;
SET query_cache_tag = 'def';
SELECT 1 SETTINGS use_query_cache = true;
SELECT query, tag FROM system.query_cache ORDER BY ALL;
SYSTEM DROP QUERY CACHE;

View File

@ -2117,6 +2117,7 @@ namenode
namepassword
nameprofile
namequota
namespace
namespaces
natively
nats