Merge pull request #61104 from ClickHouse/use-global-scalars-cache-analyzer

Use global scalars cache with analyzer
This commit is contained in:
Nikolai Kochetov 2024-03-11 18:11:10 +01:00 committed by GitHub
commit 8cfe0e4df7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 203 additions and 6 deletions

View File

@ -86,6 +86,7 @@
namespace ProfileEvents
{
extern const Event ScalarSubqueriesGlobalCacheHit;
extern const Event ScalarSubqueriesLocalCacheHit;
extern const Event ScalarSubqueriesCacheMiss;
}
@ -1444,7 +1445,8 @@ private:
std::unordered_map<QueryTreeNodePtr, size_t> node_to_tree_size;
/// Global scalar subquery to scalar value map
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value;
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_local;
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_global;
const bool only_analyze;
};
@ -1951,6 +1953,24 @@ QueryTreeNodePtr QueryAnalyzer::tryGetLambdaFromSQLUserDefinedFunctions(const st
return result_node;
}
bool subtreeHasViewSource(const IQueryTreeNode * node, const Context & context)
{
if (!node)
return false;
if (const auto * table_node = node->as<TableNode>())
{
if (table_node->getStorageID().getFullNameNotQuoted() == context.getViewSource()->getStorageID().getFullNameNotQuoted())
return true;
}
for (const auto & child : node->getChildren())
if (subtreeHasViewSource(child.get(), context))
return true;
return false;
}
/// Evaluate scalar subquery and perform constant folding if scalar subquery does not have constant value
void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope)
{
@ -1970,12 +1990,26 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
node_without_alias->removeAlias();
QueryTreeNodePtrWithHash node_with_hash(node_without_alias);
auto scalar_value_it = scalar_subquery_to_scalar_value.find(node_with_hash);
auto str_hash = DB::toString(node_with_hash.hash);
if (scalar_value_it != scalar_subquery_to_scalar_value.end())
bool can_use_global_scalars = !only_analyze && !(context->getViewSource() && subtreeHasViewSource(node_without_alias.get(), *context));
auto & scalars_cache = can_use_global_scalars ? scalar_subquery_to_scalar_value_global : scalar_subquery_to_scalar_value_local;
if (scalars_cache.contains(node_with_hash))
{
if (can_use_global_scalars)
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
else
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit);
scalar_block = scalars_cache.at(node_with_hash);
}
else if (context->hasQueryContext() && can_use_global_scalars && context->getQueryContext()->hasScalar(str_hash))
{
scalar_block = context->getQueryContext()->getScalar(str_hash);
scalar_subquery_to_scalar_value_global.emplace(node_with_hash, scalar_block);
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
scalar_block = scalar_value_it->second;
}
else
{
@ -2087,7 +2121,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
}
}
scalar_subquery_to_scalar_value.emplace(node_with_hash, scalar_block);
scalars_cache.emplace(node_with_hash, scalar_block);
if (can_use_global_scalars && context->hasQueryContext())
context->getQueryContext()->addScalar(str_hash, scalar_block);
}
const auto & scalar_column_with_type = scalar_block.safeGetByPosition(0);

View File

@ -10,7 +10,6 @@
01761_cast_to_enum_nullable
01925_join_materialized_columns
01952_optimize_distributed_group_by_sharding_key
02174_cte_scalar_cache_mv
02354_annoy
# Check after constants refactoring
02901_parallel_replicas_rollup

View File

@ -19,6 +19,48 @@
94 94 94 94 5
99 99 99 99 5
02177_MV 7 80 22
4 4 4 4 5
9 9 9 9 5
14 14 14 14 5
19 19 19 19 5
24 24 24 24 5
29 29 29 29 5
34 34 34 34 5
39 39 39 39 5
44 44 44 44 5
49 49 49 49 5
54 54 54 54 5
59 59 59 59 5
64 64 64 64 5
69 69 69 69 5
74 74 74 74 5
79 79 79 79 5
84 84 84 84 5
89 89 89 89 5
94 94 94 94 5
99 99 99 99 5
02177_MV 0 0 22
10
40
70
100
130
160
190
220
250
280
310
340
370
400
430
460
490
520
550
580
02177_MV_2 0 0 21
10
40
70
@ -61,3 +103,24 @@
188
198
02177_MV_3 20 0 1
8
18
28
38
48
58
68
78
88
98
108
118
128
138
148
158
168
178
188
198
02177_MV_3 19 0 2

View File

@ -14,6 +14,8 @@ CREATE MATERIALIZED VIEW mv1 TO t2 AS
FROM t1
LIMIT 5;
set allow_experimental_analyzer = 0;
-- FIRST INSERT
INSERT INTO t1
WITH
@ -58,8 +60,48 @@ WHERE
AND query LIKE '-- FIRST INSERT\nINSERT INTO t1\n%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
truncate table t2;
set allow_experimental_analyzer = 1;
-- FIRST INSERT ANALYZER
INSERT INTO t1
WITH
(SELECT max(i) FROM t1) AS t1
SELECT
number as i,
t1 + t1 + t1 AS j -- Using global cache
FROM system.numbers
LIMIT 100
SETTINGS
min_insert_block_size_rows=5,
max_insert_block_size=5,
min_insert_block_size_rows_for_materialized_views=5,
max_block_size=5,
max_threads=1;
SELECT k, l, m, n, count()
FROM t2
GROUP BY k, l, m, n
ORDER BY k, l, m, n;
SYSTEM FLUSH LOGS;
SELECT
'02177_MV',
ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit,
ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit,
ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss
FROM system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query LIKE '-- FIRST INSERT ANALYZER\nINSERT INTO t1\n%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
DROP TABLE mv1;
set allow_experimental_analyzer = 0;
CREATE TABLE t3 (z Int64) ENGINE = Memory;
CREATE MATERIALIZED VIEW mv2 TO t3 AS
SELECT
@ -91,8 +133,36 @@ WHERE
AND query LIKE '-- SECOND INSERT\nINSERT INTO t1%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
truncate table t3;
set allow_experimental_analyzer = 1;
-- SECOND INSERT ANALYZER
INSERT INTO t1
SELECT 0 as i, number as j from numbers(100)
SETTINGS
min_insert_block_size_rows=5,
max_insert_block_size=5,
min_insert_block_size_rows_for_materialized_views=5,
max_block_size=5,
max_threads=1;
SELECT * FROM t3 ORDER BY z ASC;
SYSTEM FLUSH LOGS;
SELECT
'02177_MV_2',
ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit,
ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit,
ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss
FROM system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query LIKE '-- SECOND INSERT ANALYZER\nINSERT INTO t1%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
DROP TABLE mv2;
set allow_experimental_analyzer = 0;
CREATE TABLE t4 (z Int64) ENGINE = Memory;
CREATE MATERIALIZED VIEW mv3 TO t4 AS
@ -126,6 +196,35 @@ WHERE
AND query LIKE '-- THIRD INSERT\nINSERT INTO t1%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
truncate table t4;
set allow_experimental_analyzer = 1;
-- THIRD INSERT ANALYZER
INSERT INTO t1
SELECT number as i, number as j from numbers(100)
SETTINGS
min_insert_block_size_rows=5,
max_insert_block_size=5,
min_insert_block_size_rows_for_materialized_views=5,
max_block_size=5,
max_threads=1;
SYSTEM FLUSH LOGS;
SELECT * FROM t4 ORDER BY z ASC;
SELECT
'02177_MV_3',
ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit,
ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit,
ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss
FROM system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query LIKE '-- THIRD INSERT ANALYZER\nINSERT INTO t1%'
AND event_date >= yesterday() AND event_time > now() - interval 10 minute;
DROP TABLE mv3;
DROP TABLE t1;
DROP TABLE t2;