Allow sharing of cache entries

This commit is contained in:
Robert Schulze 2022-12-17 18:04:18 +00:00
parent 20a7af926e
commit bc6bcdfd7b
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
7 changed files with 39 additions and 14 deletions

View File

@ -73,5 +73,9 @@ To specify the validity period after which cache entries become stale, use setti
Results of queries with non-deterministic functions such as `rand()` and `now()` are not cached by default. This behavior can be overruled using setting [query_result_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-result-cache-store-results-of-queries-with-nondeterministic-functions).
Query cache entries are not shared between users due to security reasons. For example, user A must not be able to bypass a row policy on a
table by running the same query as another user B for whom no such policy exists. If nevertheless necessary, cache entries can be marked
accessible by other users (i.e. shared) using setting [query_result_cache_share_between_users]{settings/settings.md#query-result-cache-share-between-users}.
Finally, it is sometimes useful to cache query results of the same query multiple times with different validity periods. To identify
different entries for the same query, users may pass configuration [query_result_cache_partition_key](settings/settings.md#query-result-cache-partition-key).

View File

@ -1233,6 +1233,17 @@ Possible values:
Default value: `60`
## query_result_cache_share_between_users {#query-result-cache-share-between-users}
If turned on, the result of SELECT queries cached in the [query result cache](../query-result-cache.md) are accessible to other users (shared).
Possible values:
- 0 - Disabled
- 1 - Enabled
Default value: `0`.
## query_result_cache_partition_key {#query-result-cache-partition-key}
Represents a partition of the [query result cache](../query-result-cache.md).

View File

@ -681,6 +681,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, query_result_cache_max_entry_records, (1ull << 20), "Maximum number of records SELECT query results may have to be saved in the query result cache", 0) \
M(Milliseconds, query_result_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be cached in the query result cache.", 0) \
M(Seconds, query_result_cache_keep_seconds_alive, 60, "After this time in seconds entries in the query result cache become stale", 0) \
M(Bool, query_result_cache_share_between_users, false, "Allow other users to access entry in the query result cache", 0) \
M(String, query_result_cache_partition_key, "", "Represents a partition of the query result cache", 0) \
M(UInt64, insert_keeper_max_retries, 0, "Max retries for keeper operations during insert", 0) \
M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \

View File

@ -99,12 +99,13 @@ ASTPtr removeQueryResultCacheSettings(ASTPtr ast)
}
QueryResultCache::Key::Key(
ASTPtr ast_, String username_, String partition_key_,
Block header_, std::chrono::time_point<std::chrono::system_clock> expires_at_)
ASTPtr ast_, String partition_key_,
Block header_, const std::optional<String> & username_,
std::chrono::time_point<std::chrono::system_clock> expires_at_)
: ast(removeQueryResultCacheSettings(ast_))
, username(username_)
, partition_key(partition_key_)
, header(header_)
, username(username_)
, expires_at(expires_at_)
{
}
@ -112,7 +113,6 @@ QueryResultCache::Key::Key(
bool QueryResultCache::Key::operator==(const Key & other) const
{
return ast->getTreeHash() == other.ast->getTreeHash()
&& username == other.username
&& partition_key == other.partition_key;
}
@ -128,7 +128,6 @@ size_t QueryResultCache::KeyHasher::operator()(const Key & key) const
{
SipHash hash;
hash.update(key.ast->getTreeHash());
hash.update(key.username);
hash.update(key.partition_key);
auto res = hash.get64();
return res;
@ -258,6 +257,12 @@ QueryResultCache::Reader::Reader(const Cache & cache_, const Key & key)
return;
}
if (it->first.username.has_value() && it->first.username != key.username)
{
LOG_DEBUG(&Poco::Logger::get("QueryResultCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
return;
}
if (it->first.expires_at < std::chrono::system_clock::now())
{
LOG_DEBUG(&Poco::Logger::get("QueryResultCache"), "Stale entry found for query {}", key.queryStringFromAst());

View File

@ -30,11 +30,6 @@ public:
/// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select)
const ASTPtr ast;
/// It is unlikely that different users pose the same queries. More importantly, sharing query results between users potentially
/// breaches security. E.g. User A must not be able to bypass row policies on some table by running the same queries as user B for
/// whom no row policies exist.
const String username;
/// Identifies a (virtual) cache partition. Can be used to cache the same query multiple times with different timeouts.
const String partition_key;
@ -47,10 +42,17 @@ public:
/// For constructing the pipe.
const Block header;
/// Std::nullopt means that the associated entry can be read by other users. In general, sharing is a bad idea: First, it is
/// unlikely that different users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be
/// able to bypass row policies on some table by running the same queries as user B for whom no row policies exist.
const std::optional<String> username;
/// When does the entry expire?
const std::chrono::time_point<std::chrono::system_clock> expires_at;
Key(ASTPtr ast_, String username_, String partition_key_, Block header_, std::chrono::time_point<std::chrono::system_clock> expires_at_);
Key(ASTPtr ast_, String partition_key_,
Block header_, const std::optional<String> & username_,
std::chrono::time_point<std::chrono::system_clock> expires_at_);
bool operator==(const Key & other) const;
String queryStringFromAst() const;

View File

@ -693,7 +693,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
&& query_result_cache != nullptr && res.pipeline.pulling())
{
QueryResultCache::Key key(
ast, context->getUserName(), settings.query_result_cache_partition_key, res.pipeline.getHeader(),
ast, settings.query_result_cache_partition_key, res.pipeline.getHeader(),
std::make_optional<String>(context->getUserName()),
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_keep_seconds_alive));
QueryResultCache::Reader reader = query_result_cache->createReader(key);
if (reader.hasCacheEntryForKey())
@ -704,7 +705,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
&& (settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions || !astContainsNonDeterministicFunctions(ast, context)))
{
QueryResultCache::Key key(
ast, context->getUserName(), settings.query_result_cache_partition_key, res.pipeline.getHeader(),
ast, settings.query_result_cache_partition_key, res.pipeline.getHeader(),
settings.query_result_cache_share_between_users ? std::nullopt : std::make_optional<String>(context->getUserName()),
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_keep_seconds_alive));
const size_t num_query_runs = query_result_cache->recordQueryRun(key);

View File

@ -38,7 +38,7 @@ void StorageSystemQueryResultCache::fillData(MutableColumns & res_columns, Conte
for (const auto & [key, entry] : query_result_cache->cache)
{
/// Showing other user's queries is considered a security risk
if (key.username != context->getUserName())
if (key.username.has_value() && key.username != context->getUserName())
continue;
res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string