From b125fd87189d490cbb9a9101e0a49006a97d534a Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 18 Oct 2022 09:34:53 -0400 Subject: [PATCH 1/4] Update uk-price-paid.md --- docs/en/getting-started/example-datasets/uk-price-paid.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index ef20c03883f..2a89bfda2e7 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -101,7 +101,7 @@ SELECT count() FROM uk_price_paid ``` -At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: +At the time this query was run, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: ```sql SELECT formatReadableSize(total_bytes) @@ -342,7 +342,7 @@ The result looks like: ## Let's Speed Up Queries Using Projections {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). +[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At query time, ClickHouse will use your projection if it thinks the projection can improve the performance of the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). ### Build a Projection {#build-projection} From 2033ea12169e5d511e32acb6b951606bb275f36a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 21 Oct 2022 14:53:23 +0200 Subject: [PATCH 2/4] Support subqueries in row policy filters. --- src/Interpreters/InterpreterSelectQuery.cpp | 11 ++++++--- tests/integration/test_row_policy/test.py | 27 +++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 79deb38317c..4139685c379 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -117,7 +117,8 @@ FilterDAGInfoPtr generateFilterActions( const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot, const StorageMetadataPtr & metadata_snapshot, - Names & prerequisite_columns) + Names & prerequisite_columns, + PreparedSetsPtr prepared_sets) { auto filter_info = std::make_shared(); @@ -155,7 +156,7 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); - SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot); + SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); filter_info->actions = analyzer.simpleSelectActions(); filter_info->column_name = expr_list->children.at(0)->getColumnName(); @@ -615,7 +616,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (row_policy_filter) { filter_info = generateFilterActions( - table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); query_info.filter_asts.push_back(row_policy_filter); } @@ -623,7 +625,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (query_info.additional_filter_ast) { additional_filter_info = generateFilterActions( - table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); additional_filter_info->do_remove_column = true; diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 2e696be4988..1933823f5d2 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -867,3 +867,30 @@ def test_policy_on_distributed_table_via_role(): assert node.query( "SELECT * FROM dist_tbl SETTINGS prefer_localhost_replica=0", user="user1" ) == TSV([[0], [2], [4], [6], [8], [0], [2], [4], [6], [8]]) + + +def test_row_policy_filter_with_subquery(): + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + + node.query("DROP ROW POLICY IF EXISTS filter_1 ON table1") + node.query("DROP TABLE IF EXISTS table_1") + node.query("DROP TABLE IF EXISTS table_2") + + node.query( + "CREATE TABLE table_1 (x int, y int) ENGINE = MergeTree ORDER BY tuple()" + ) + node.query("INSERT INTO table_1 SELECT number, number * number FROM numbers(10)") + + node.query("CREATE TABLE table_2 (a int) ENGINE=MergeTree ORDER BY tuple()") + node.query("INSERT INTO table_2 VALUES (3), (5)") + + node.query( + "CREATE ROW POLICY filter_1 ON table_1 USING x IN (SELECT a FROM table_2) TO ALL" + ) + + assert node.query("SELECT * FROM table_1") == TSV([[3, 9], [5, 25]]) + + node.query("DROP ROW POLICY filter_1 ON table_1") + node.query("DROP TABLE table_1") + node.query("DROP TABLE table_2") From 06d46ca51bfffd95fcab5dcb678c85566d373a1f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 22 Oct 2022 13:28:22 +0000 Subject: [PATCH 3/4] Clear thread::id when ThreadFromGlobalPool exits. --- src/Common/ThreadPool.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 76ada9e0d75..cfeb8deaac5 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -178,7 +178,10 @@ public: func = std::forward(func), args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { - SCOPE_EXIT(state->event.set()); + SCOPE_EXIT( + state->thread_id = std::thread::id(); + state->event.set(); + ); state->thread_id = std::this_thread::get_id(); From f0aed688938281a1782f5acf4803126b6343897b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 20:53:18 +0200 Subject: [PATCH 4/4] ci/stress: fix memory limits overrides for hung check Signed-off-by: Azat Khuzhin --- docker/test/stress/stress | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 7f3f38bd8f5..a0ec86f7fbe 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -286,9 +286,7 @@ if __name__ == "__main__": # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. "--client-option", "max_untracked_memory=1Gi", - "--client-option", "max_memory_usage_for_user=0", - "--client-option", "memory_profiler_step=1Gi", # Use system database to avoid CREATE/DROP DATABASE queries "--database=system",