From 68a08e5a5e52f37ab1cdb614b7c4a00d285d88fb Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 12:50:00 +0000 Subject: [PATCH 01/36] add test for #37090 --- .../03158_unkn_col_distributed_table_with_alias.reference | 1 + .../03158_unkn_col_distributed_table_with_alias.sql | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference create mode 100644 tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference new file mode 100644 index 00000000000..e965047ad7c --- /dev/null +++ b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference @@ -0,0 +1 @@ +Hello diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql new file mode 100644 index 00000000000..d8ccd5b1827 --- /dev/null +++ b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql @@ -0,0 +1,6 @@ +drop table IF EXISTS local; +drop table IF EXISTS dist; +CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE dist AS default.local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); +SET prefer_localhost_replica = 1; +WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`; From 5048b2e9a5671de4671ac6d3ef9a33fe9c3ba09b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 15:36:57 +0200 Subject: [PATCH 02/36] Update 03158_unkn_col_distributed_table_with_alias.sql --- .../0_stateless/03158_unkn_col_distributed_table_with_alias.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql index d8ccd5b1827..2d6a60d8a90 100644 --- a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql +++ b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql @@ -1,6 +1,6 @@ drop table IF EXISTS local; drop table IF EXISTS dist; CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); -CREATE TABLE dist AS default.local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); +CREATE TABLE dist AS default.local ENGINE = Distributed(localhost_cluster, currentDatabase(), local) where current_database = currentDatabase(); SET prefer_localhost_replica = 1; WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`; From d6cc4c353d03f101332e3a454bb8f9f6702a00b1 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 16:08:57 +0200 Subject: [PATCH 03/36] Update 03158_unkn_col_distributed_table_with_alias.sql --- .../0_stateless/03158_unkn_col_distributed_table_with_alias.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql index 2d6a60d8a90..33427a8a674 100644 --- a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql +++ b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql @@ -1,6 +1,6 @@ drop table IF EXISTS local; drop table IF EXISTS dist; CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); -CREATE TABLE dist AS default.local ENGINE = Distributed(localhost_cluster, currentDatabase(), local) where current_database = currentDatabase(); +CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local) where current_database = currentDatabase(); SET prefer_localhost_replica = 1; WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`; From 0f887eef4c52aa3837014327304624f58b7e293c Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 16:09:21 +0200 Subject: [PATCH 04/36] Update 03158_unkn_col_distributed_table_with_alias.sql --- .../0_stateless/03158_unkn_col_distributed_table_with_alias.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql index 33427a8a674..5b017d2631c 100644 --- a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql +++ b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql @@ -1,6 +1,6 @@ drop table IF EXISTS local; drop table IF EXISTS dist; CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); -CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local) where current_database = currentDatabase(); +CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); SET prefer_localhost_replica = 1; WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`; From f58cd8ae72b293e2dbf8afd5eae788168bc88ec4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 17:02:09 +0200 Subject: [PATCH 05/36] integration test instead of stateless --- .../__init__.py | 0 .../configs/clusters.xml | 12 +++++++ .../test.py | 34 +++++++++++++++++++ ...col_distributed_table_with_alias.reference | 1 - ..._unkn_col_distributed_table_with_alias.sql | 6 ---- 5 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 tests/integration/test_unknown_column_dist_table_with_alias/__init__.py create mode 100644 tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml create mode 100644 tests/integration/test_unknown_column_dist_table_with_alias/test.py delete mode 100644 tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference delete mode 100644 tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py b/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml new file mode 100644 index 00000000000..754d765f23f --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml @@ -0,0 +1,12 @@ + + + + + + localhost + 9000 + + + + + diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py new file mode 100644 index 00000000000..bb939ccac85 --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -0,0 +1,34 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import logging + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", main_configs=["configs/clusters.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_distributed_table_with_alias(start_cluster): + node.query("") + node.query( + """ + drop table IF EXISTS local; + drop table IF EXISTS dist; + CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); + CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); + SET prefer_localhost_replica = 1; + """ + ) + assert str(node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`;")) == 'Hello' diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference deleted file mode 100644 index e965047ad7c..00000000000 --- a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.reference +++ /dev/null @@ -1 +0,0 @@ -Hello diff --git a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql b/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql deleted file mode 100644 index 5b017d2631c..00000000000 --- a/tests/queries/0_stateless/03158_unkn_col_distributed_table_with_alias.sql +++ /dev/null @@ -1,6 +0,0 @@ -drop table IF EXISTS local; -drop table IF EXISTS dist; -CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); -CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); -SET prefer_localhost_replica = 1; -WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`; From e4812c76dfcab7ec5ca29fbe9f852445a5ef5e28 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 22 May 2024 19:28:01 +0200 Subject: [PATCH 06/36] black check fix --- .../test.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index bb939ccac85..2907f352f40 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -3,9 +3,7 @@ from helpers.cluster import ClickHouseCluster import logging cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", main_configs=["configs/clusters.xml"] -) +node = cluster.add_instance("node", main_configs=["configs/clusters.xml"]) @pytest.fixture(scope="module") @@ -31,4 +29,11 @@ def test_distributed_table_with_alias(start_cluster): SET prefer_localhost_replica = 1; """ ) - assert str(node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`;")) == 'Hello' + assert ( + str( + node.query( + "WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`;" + ) + ) + == "Hello" + ) From a4d303a8e618b460bb9f06bb9634b1697b669793 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 23 May 2024 13:35:09 +0200 Subject: [PATCH 07/36] Update test.py --- .../test_unknown_column_dist_table_with_alias/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index 2907f352f40..884a9f72077 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -32,7 +32,7 @@ def test_distributed_table_with_alias(start_cluster): assert ( str( node.query( - "WITH 'Hello' AS `alias` SELECT `alias` FROM default.dist GROUP BY `alias`;" + "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;" ) ) == "Hello" From 6298d23a2feb697b524bbfdf783015c676aac2b6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 28 May 2024 16:41:59 +0200 Subject: [PATCH 08/36] Check that query doesn't fail --- .../test.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index 884a9f72077..33be5ccd82c 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -29,11 +29,12 @@ def test_distributed_table_with_alias(start_cluster): SET prefer_localhost_replica = 1; """ ) - assert ( - str( - node.query( - "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;" - ) + try: + # Attempt to execute the query + node.query( + "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;" ) - == "Hello" - ) + except QueryRuntimeException as e: + # If an exception occurs, fail the test + pytest.fail(f"Query raised an exception: {e}") + From c572290e50886af3ec3aa9e9366c2460bd02f423 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 28 May 2024 16:53:14 +0200 Subject: [PATCH 09/36] black check --- .../test_unknown_column_dist_table_with_alias/test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index 33be5ccd82c..eed4ca84b46 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -31,10 +31,7 @@ def test_distributed_table_with_alias(start_cluster): ) try: # Attempt to execute the query - node.query( - "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;" - ) + node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;") except QueryRuntimeException as e: # If an exception occurs, fail the test pytest.fail(f"Query raised an exception: {e}") - From 3055aa4deb53e34df6f4ddadd5c8e27b379bed39 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 29 May 2024 13:43:40 +0000 Subject: [PATCH 10/36] Store first analysis result in ReadFromMergeTree --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 8 +++++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 503031eb04b..23a37a4ab37 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1851,10 +1851,12 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() return output_each_partition_through_separate_port = true; } -ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const +ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() { - auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(); - return *result_ptr; + if (!analyzed_result_ptr) + analyzed_result_ptr = selectRangesToRead(); + + return *analyzed_result_ptr; } bool ReadFromMergeTree::isQueryWithSampling() const diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5d7879e8dee..55ee1305b3f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -272,7 +272,7 @@ private: Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); - ReadFromMergeTree::AnalysisResult getAnalysisResult() const; + ReadFromMergeTree::AnalysisResult getAnalysisResult(); AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; From 871b91a7c58eedcbdeaf6cf298aecf3fb95c7e06 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 29 May 2024 20:02:55 +0200 Subject: [PATCH 11/36] empty commit From fd67fce7fe1ee956926e9093d2c50f2ae8008a46 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 29 May 2024 20:46:12 +0000 Subject: [PATCH 12/36] Fix build --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 23a37a4ab37..048b0d807d3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1851,7 +1851,7 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() return output_each_partition_through_separate_port = true; } -ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() +ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { if (!analyzed_result_ptr) analyzed_result_ptr = selectRangesToRead(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 55ee1305b3f..2ce9605d7a4 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -272,9 +272,9 @@ private: Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); - ReadFromMergeTree::AnalysisResult getAnalysisResult(); + ReadFromMergeTree::AnalysisResult getAnalysisResult() const; - AnalysisResultPtr analyzed_result_ptr; + mutable AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; From b0c955e9c924c6dcb9ce05bdd1d24ecaaf0adcd3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 29 May 2024 19:45:56 +0000 Subject: [PATCH 13/36] Various stuff --- docs/en/sql-reference/data-types/boolean.md | 2 +- docs/en/sql-reference/data-types/map.md | 63 ++-- .../functions/tuple-map-functions.md | 303 +++++++++--------- src/Client/QueryFuzzer.cpp | 2 +- src/DataTypes/DataTypeMap.cpp | 9 +- src/DataTypes/DataTypeMap.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 2 +- .../0_stateless/00825_protobuf_format_map.sh | 2 - .../0_stateless/00900_long_parquet_load.sh | 1 - .../00900_orc_arrow_parquet_maps.sh | 1 - .../01318_map_add_map_subtract.sql | 24 +- ...map_add_map_subtract_on_map_type.reference | 1 + ...01318_map_add_map_subtract_on_map_type.sql | 19 +- .../0_stateless/01475_read_subcolumns.sql | 1 - .../0_stateless/01475_read_subcolumns_3.sql | 1 - .../01475_read_subcolumns_storages.sh | 2 +- .../0_stateless/01550_create_map_type.sql | 2 - .../0_stateless/01550_type_map_formats.sql | 1 - .../01550_type_map_formats_input.sh | 2 +- .../queries/0_stateless/01592_length_map.sql | 2 - .../0_stateless/01651_map_functions.reference | 10 +- .../0_stateless/01651_map_functions.sql | 43 ++- .../01715_tuple_insert_null_as_default.sql | 2 - .../0_stateless/01720_type_map_and_casts.sql | 2 - .../01763_support_map_lowcardinality_type.sql | 1 - .../0_stateless/01803_const_nullable_map.sql | 2 - .../0_stateless/01852_map_combinator.sql | 1 - .../01872_functions_to_subcolumns.sql | 1 - .../0_stateless/01905_to_json_string.sql | 1 - ...01925_map_populate_series_on_map.reference | 1 - .../01925_map_populate_series_on_map.sql | 1 - .../0_stateless/02002_parse_map_int_key.sql | 2 - ...2030_function_mapContainsKeyLike.reference | 1 + .../02030_function_mapContainsKeyLike.sql | 1 + .../0_stateless/02169_map_functions.reference | 21 ++ .../0_stateless/02169_map_functions.sql | 11 + .../0_stateless/02524_fuzz_and_fuss_2.sql | 2 +- .../0_stateless/02995_baseline_23_12_1.tsv | 1 - 38 files changed, 297 insertions(+), 249 deletions(-) diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 4c59bd947de..6fcbc218c5d 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/boolean sidebar_position: 22 -sidebar_label: Boolean +sidebar_label: Bool --- # Bool diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 18c7816f811..df981e80e45 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -7,100 +7,103 @@ sidebar_label: Map(K, V) # Map(K, V) `Map(K, V)` data type stores `key:value` pairs. -The Map datatype is implemented as `Array(Tuple(key T1, value T2))`, which means that the order of keys in each map does not change, i.e., this data type maintains insertion order. + +Maps are internally implemented as `Array(Tuple(key T1, value T2))`. +As a result, maps maintain the order in which keys are inserted. **Parameters** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `K` — The type of the Map keys. Arbitrary type except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. +- `V` — The type of the Map values. Arbitrary type. -To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. +You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. **Examples** Consider the table: ``` sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; -INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); ``` -Select all `key2` values: +To select all `key2` values: ```sql -SELECT a['key2'] FROM table_map; +SELECT m['key2'] FROM tab; ``` + Result: ```text -┌─arrayElement(a, 'key2')─┐ +┌─arrayElement(m, 'key2')─┐ │ 10 │ │ 20 │ │ 30 │ └─────────────────────────┘ ``` -If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. +If the map does not contain the requested key, `m[k]` returns the value type's default value, e.g. `0` for integer types, `''` for string types or `[]` for Array types. ```sql -INSERT INTO table_map VALUES ({'key3':100}), ({}); -SELECT a['key3'] FROM table_map; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':100}), ({}); +SELECT m['key1'] FROM tab; ``` Result: ```text -┌─arrayElement(a, 'key3')─┐ +┌─arrayElement(m, 'key1')─┐ │ 100 │ │ 0 │ └─────────────────────────┘ -┌─arrayElement(a, 'key3')─┐ -│ 0 │ -│ 0 │ -│ 0 │ -└─────────────────────────┘ ``` -## Convert Tuple to Map Type +## Converting Tuple to Map -You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function: +Values of type `Tuple()` can be casted to values of type `Map()` using function [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast): + +**Example** + +Query: ``` sql SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map; ``` +Result: + ``` text ┌─map───────────────────────────┐ │ {1:'Ready',2:'Steady',3:'Go'} │ └───────────────────────────────┘ ``` -## Map.keys and Map.values Subcolumns +## Reading subcolumns of Map -To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column. +To avoid reading the entire map, you can use subcolumsn `keys` and `values` in some cases. **Example** Query: ``` sql -CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE = Memory; +INSERT INTO tab VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); -INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); - -SELECT a.keys FROM t_map; - -SELECT a.values FROM t_map; +SELECT m.keys FROM tab; +SELECT m.values FROM tab; ``` Result: ``` text -┌─a.keys─────────────────┐ +┌─m.keys─────────────────┐ │ ['key1','key2','key3'] │ └────────────────────────┘ -┌─a.values─┐ +┌─m.values─┐ │ [1,2,3] │ └──────────┘ ``` diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index d9c18e2a0a2..0436791dd8f 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -6,7 +6,7 @@ sidebar_label: Maps ## map -Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type. +Creates a value of type [Map(key, value)](../data-types/map.md) from key-value pairs. **Syntax** @@ -16,12 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md). +- `key_n` — The keys of the map entries. Any type supported as key type of [Map](../data-types/map.md). +- `value_n` — The values of the map entries. Any type supported as value type of [Map](../data-types/map.md). **Returned value** -- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md). +- A map containing `key:value` pairs. [Map(key, value)](../data-types/map.md). **Examples** @@ -41,35 +41,13 @@ Result: └──────────────────────────────────────────────────┘ ``` -Query: - -```sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; -INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); -SELECT a['key2'] FROM table_map; -``` - -Result: - -```text -┌─arrayElement(a, 'key2')─┐ -│ 0 │ -│ 2 │ -│ 4 │ -└─────────────────────────┘ -``` - -**See Also** - -- [Map(key, value)](../data-types/map.md) data type - ## mapFromArrays -Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing. - - -The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. +Creates a map from an array of keys and an array of values. +The second argument can also be a map, it will be casted during execution to an array. +The function is a convenient alternative to syntax `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. +For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** @@ -81,12 +59,12 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md) -- `values` - Given value array or map to create a map from. +- `keys` — Array of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. +- `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** -- A map whose keys and values are constructed from the key array and value array/map. +- A map with keys and values constructed from the key array and value array/map. **Example** @@ -94,14 +72,25 @@ Query: ```sql select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) +``` +Result: +``` ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ │ {'a':1,'b':2,'c':3} │ └───────────────────────────────────────────┘ +``` +Query + +```sql SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) +``` +Result: + +``` ┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐ │ {1:('a',1),2:('b',2),3:('c',3)} │ └───────────────────────────────────────────────────────┘ @@ -109,9 +98,11 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) ## extractKeyValuePairs -Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). - -A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. +Creates a value of type [Map(String, String)](../data-types/map.md) from key-value pairs in a string. +Parsing is robust towards noise (e.g. log files). +A key-value pair consists of a key, followed by a key-value delimiter, and a value. +Key value pairs are separated by `pair_delimiter`. +Keys and values can also be quoted. **Syntax** @@ -126,17 +117,17 @@ Alias: **Arguments** - `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `key_value_delimiter` - Single character delimiting keys and values. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `pair_delimiters` - Set of character delimiting pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `quoting_character` - Single character used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- A [Map(String, String)](../data-types/map.md) of key-value pairs. +- A of key-value pairs. Type: [Map(String, String)](../data-types/map.md) **Examples** -Simple case: +Query ``` sql SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv @@ -150,7 +141,7 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` -Single quote as quoting character: +With a single quote `'`as quoting character: ``` sql SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv @@ -180,7 +171,7 @@ Result: ## extractKeyValuePairsWithEscaping -Same as `extractKeyValuePairs` but with escaping support. +Same as `extractKeyValuePairs` but supports escaping. Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`. Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following: @@ -229,20 +220,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu **Example** -Query with a tuple: - -```sql -SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res───────────┬─type───────────────────────────────┐ -│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ -└───────────────┴────────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -257,6 +234,20 @@ Result: └──────────────────────────────┘ ``` +Query with a tuple: + +```sql +SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; +``` + +Result: + +```text +┌─res───────────┬─type───────────────────────────────┐ +│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ +└───────────────┴────────────────────────────────────┘ +``` + ## mapSubtract Collect all the keys and subtract corresponding values. @@ -277,20 +268,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu **Example** -Query with a tuple map: - -```sql -SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res────────────┬─type──────────────────────────────┐ -│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ -└────────────────┴───────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -305,55 +282,57 @@ Result: └───────────────────────────────────┘ ``` -## mapPopulateSeries - -Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array. - -**Syntax** +Query with a tuple map: ```sql -mapPopulateSeries(keys, values[, max]) -mapPopulateSeries(map[, max]) -``` - -Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key. - -For array arguments the number of elements in `keys` and `values` must be the same for each row. - -**Arguments** - -Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. - -Mapped arrays: - -- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). - -or - -- `map` — Map with integer keys. [Map](../data-types/map.md). - -**Returned value** - -- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. - -**Example** - -Query with mapped arrays: - -```sql -SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; ``` Result: ```text -┌─res──────────────────────────┬─type──────────────────────────────┐ -│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ -└──────────────────────────────┴───────────────────────────────────┘ +┌─res────────────┬─type──────────────────────────────┐ +│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ +└────────────────┴───────────────────────────────────┘ ``` +## mapPopulateSeries + +Fills missing key-value pairs in a map with integer keys. +To support extending the keys beyond the largest value, a maximum key can be specified. +More specificaly, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values. +If no value is specified for a key, a default value is used as value. +In case keys repeat, only the first value (in order of appearance) is associated with the key. + +**Syntax** + +```sql +mapPopulateSeries(map[, max]) +mapPopulateSeries(keys, values[, max]) +``` + +For array arguments the number of elements in `keys` and `values` must be the same for each row. + +**Arguments** + +Arguments are [Maps](../data-types/map.md) or two [Arrays](../data-types/array.md#data-type-array), where the first and second array contains keys and values for the each key. + +Mapped arrays: + +- `map` — Map with integer keys. [Map](../data-types/map.md). + +or + +- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). + +**Returned value** + +- Depending on the arguments a [Map](../data-types/map.md) or a [Tuple](../data-types/tuple.md#tuplet1-t2) of two [Arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. + +**Example** + Query with `Map` type: ```sql @@ -368,9 +347,23 @@ Result: └─────────────────────────────────────────┘ ``` +Query with mapped arrays: + +```sql +SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +``` + +Result: + +```text +┌─res──────────────────────────┬─type──────────────────────────────┐ +│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ +└──────────────────────────────┴───────────────────────────────────┘ +``` + ## mapContains -Determines whether the `map` contains the `key` parameter. +Returns if a given key is contained in a given map. **Syntax** @@ -381,7 +374,7 @@ mapContains(map, key) **Arguments** - `map` — Map. [Map](../data-types/map.md). -- `key` — Key. Type matches the type of keys of `map` parameter. +- `key` — Key. Type must match the key type of `map`. **Returned value** @@ -392,11 +385,11 @@ mapContains(map, key) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapContains(a, 'name') FROM test; +SELECT mapContains(a, 'name') FROM tab; ``` @@ -411,9 +404,11 @@ Result: ## mapKeys -Returns all keys from the `map` parameter. +Returns the keys of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [keys](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapKeys(m) FROM table` is transformed to `SELECT m.keys FROM table`. **Syntax** @@ -434,11 +429,11 @@ mapKeys(map) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapKeys(a) FROM test; +SELECT mapKeys(a) FROM tab; ``` Result: @@ -452,9 +447,11 @@ Result: ## mapValues -Returns all values from the `map` parameter. +Returns the values of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [values](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapValues(m) FROM table` is transformed to `SELECT m.values FROM table`. **Syntax** @@ -475,11 +472,11 @@ mapValues(map) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapValues(a) FROM test; +SELECT mapValues(a) FROM tab; ``` Result: @@ -512,11 +509,11 @@ mapContainsKeyLike(map, pattern) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapContainsKeyLike(a, 'a%') FROM test; +SELECT mapContainsKeyLike(a, 'a%') FROM tab; ``` Result: @@ -530,6 +527,8 @@ Result: ## mapExtractKeyLike +Give a map with string keys and a LIKE pattern, this function returns a map with elements where the key matches the pattern. + **Syntax** ```sql @@ -543,18 +542,18 @@ mapExtractKeyLike(map, pattern) **Returned value** -- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map. +- A map containing elements the key matching the specified pattern. If no elements match the pattern, an empty map is returned. **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapExtractKeyLike(a, 'a%') FROM test; +SELECT mapExtractKeyLike(a, 'a%') FROM tab; ``` Result: @@ -568,6 +567,8 @@ Result: ## mapApply +Applies a function to each element of a map. + **Syntax** ```sql @@ -608,6 +609,8 @@ Result: ## mapFilter +Filters a map by applying a function to each map element. + **Syntax** ```sql @@ -623,7 +626,6 @@ mapFilter(func, map) - Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0. - **Example** Query: @@ -647,7 +649,6 @@ Result: └─────────────────────┘ ``` - ## mapUpdate **Syntax** @@ -683,6 +684,9 @@ Result: ## mapConcat +Concatenates multiple maps based on the equality of their keys. +If elements with the same key exist in more than one input map, all elements are added to the result map, but only the first one is accessible via operator `[]` + **Syntax** ```sql @@ -691,11 +695,11 @@ mapConcat(maps) **Arguments** -- `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type. +- `maps` – Arbitrarily many [Maps](../data-types/map.md). **Returned value** -- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]` +- Returns a map with concatenated maps passed as arguments. **Examples** @@ -729,9 +733,12 @@ Result: ## mapExists(\[func,\], map) -Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. +Returns 1 if at least one key-value pair in `map` exists for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. -Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +`mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -743,7 +750,7 @@ SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 1 │ └─────┘ @@ -753,7 +760,10 @@ Result: Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0. -Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -765,7 +775,7 @@ SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 0 │ └─────┘ @@ -773,7 +783,8 @@ Result: ## mapSort(\[func,\], map) -Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. +Sorts the elements of a map in ascending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -801,8 +812,8 @@ For more details see the [reference](../../sql-reference/functions/array-functio ## mapReverseSort(\[func,\], map) -Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. - +Sorts the elements of a map in descending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -826,4 +837,4 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function. +For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 03730fcedaa..f5b700ea529 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -598,7 +598,7 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) { auto key_type = fuzzDataType(type_map->getKeyType()); auto value_type = fuzzDataType(type_map->getValueType()); - if (!DataTypeMap::checkKeyType(key_type)) + if (!DataTypeMap::isValidKeyType(key_type)) key_type = type_map->getKeyType(); return std::make_shared(key_type, value_type); diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 4d7ab63f966..ea46927344a 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -66,11 +66,8 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu void DataTypeMap::assertKeyType() const { - if (!checkKeyType(key_type)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Type of Map key must be a type, that can be represented by integer " - "or String or FixedString (possibly LowCardinality) or UUID or IPv6," - " but {} given", key_type->getName()); + if (!isValidKeyType(key_type)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Map cannot have a key of type {}", key_type->getName()); } @@ -116,7 +113,7 @@ bool DataTypeMap::equals(const IDataType & rhs) const return nested->equals(*rhs_map.nested); } -bool DataTypeMap::checkKeyType(DataTypePtr key_type) +bool DataTypeMap::isValidKeyType(DataTypePtr key_type) { return !isNullableOrLowCardinalityNullable(key_type); } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 4866c3e78cc..c506591ba79 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -52,7 +52,7 @@ public: SerializationPtr doGetDefaultSerialization() const override; - static bool checkKeyType(DataTypePtr key_type); + static bool isValidKeyType(DataTypePtr key_type); void forEachChild(const ChildCallback & callback) const override; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 02c0aa6dd77..6cbcae2bebe 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1222,7 +1222,7 @@ namespace return nullptr; auto key_type = removeNullable(key_types.back()); - if (!DataTypeMap::checkKeyType(key_type)) + if (!DataTypeMap::isValidKeyType(key_type)) return nullptr; return std::make_shared(key_type, value_types.back()); diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh index 81d1cf2e305..82ebcc6959a 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_map.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh @@ -10,8 +10,6 @@ set -eo pipefail # Run the client. $CLICKHOUSE_CLIENT --multiquery < k = array(1,2), map(array(1,2), 4, array(3,4), 5)); SELECT mapExists((k, v) -> k = map(1,2), map(map(1,2), 4, map(3,4), 5)); SELECT mapExists((k, v) -> k = tuple(1,2), map(tuple(1,2), 4, tuple(3,4), 5)); +SELECT mapAll((k, v) -> k = 0.1::Float32, map(0.1::Float32, 4, 0.2::Float32, 5)); +SELECT mapAll((k, v) -> k = 0.1::Float64, map(0.1::Float64, 4, 0.2::Float64, 5)); +SELECT mapAll((k, v) -> k = array(1,2), map(array(1,2), 4, array(3,4), 5)); +SELECT mapAll((k, v) -> k = map(1,2), map(map(1,2), 4, map(3,4), 5)); +SELECT mapAll((k, v) -> k = tuple(1,2), map(tuple(1,2), 4, tuple(3,4), 5)); + SELECT mapSort((k, v) -> k, map(0.1::Float32, 4, 0.2::Float32, 5)); SELECT mapSort((k, v) -> k, map(0.1::Float64, 4, 0.2::Float64, 5)); SELECT mapSort((k, v) -> k, map(array(1,2), 4, array(3,4), 5)); @@ -41,6 +47,9 @@ SELECT mapConcat(map(tuple(1,2), 4), map(tuple(3,4), 5)); SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id; SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id; +SELECT mapAll((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id; +SELECT mapAll((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id; + SELECT mapSort(col) FROM table_map ORDER BY id; SELECT mapSort((k, v) -> v, col) FROM table_map ORDER BY id; SELECT mapPartialSort((k, v) -> k, 2, col) FROM table_map ORDER BY id; @@ -50,6 +59,8 @@ SELECT mapApply((x, y) -> (x, x + 1), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1, 0, 2, 0))); SELECT mapApply((x, y) -> ('x', 'y'), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> ('x', 'y'), materialize(map(1, 0, 2, 0))); +SELECT mapApply((x, y) -> (x, x + 1), map(1.0, 0, 2.0, 0)); +SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1.0, 0, 2.0, 0))); SELECT mapUpdate(map('k1', 1, 'k2', 2), map('k1', 11, 'k2', 22)); SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), map('k1', 11, 'k2', 22)); diff --git a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql index 9988eef0ad3..7b49378d4da 100644 --- a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql +++ b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql @@ -9,6 +9,6 @@ ENGINE = Memory; INSERT INTO data_a_02187 SELECT * FROM system.one -SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; +SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; DROP TABLE data_a_02187; diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv index 4c0c9125b46..7b001180abf 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -30,7 +30,6 @@ allow_experimental_hash_functions 0 allow_experimental_inverted_index 0 allow_experimental_lightweight_delete 1 allow_experimental_live_view 0 -allow_experimental_map_type 1 allow_experimental_materialized_postgresql_table 0 allow_experimental_nlp_functions 0 allow_experimental_object_type 0 From 33fd812a306139efd57ed761dc6509f00779c3ae Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 May 2024 10:08:17 +0000 Subject: [PATCH 14/36] Fix spelling --- docs/en/sql-reference/data-types/map.md | 2 +- docs/en/sql-reference/functions/tuple-map-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index df981e80e45..ce249b338a0 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -82,7 +82,7 @@ Result: ## Reading subcolumns of Map -To avoid reading the entire map, you can use subcolumsn `keys` and `values` in some cases. +To avoid reading the entire map, you can use subcolumns `keys` and `values` in some cases. **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 0436791dd8f..0bc5ef38f89 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -300,7 +300,7 @@ Result: Fills missing key-value pairs in a map with integer keys. To support extending the keys beyond the largest value, a maximum key can be specified. -More specificaly, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values. +More specifically, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values. If no value is specified for a key, a default value is used as value. In case keys repeat, only the first value (in order of appearance) is associated with the key. From 14b2584980ce4f98deb05a2612e24f6d203ddb65 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 May 2024 10:41:14 +0000 Subject: [PATCH 15/36] Incorporate review feedback --- docs/en/sql-reference/data-types/map.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index ce249b338a0..4c264f2d7b4 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -6,28 +6,30 @@ sidebar_label: Map(K, V) # Map(K, V) -`Map(K, V)` data type stores `key:value` pairs. +Data type `Map(K, V)` stores key-value pairs. -Maps are internally implemented as `Array(Tuple(key T1, value T2))`. -As a result, maps maintain the order in which keys are inserted. +Unlike other databases, maps are not unique in ClickHouse, i.e. a map can contain two elements with the same key. +(The reason for that is that maps are internally implemented as `Array(Tuple(K, V))`.) + +You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. +If more than one element with key `k` exists, the value for the first element is returned. +Also, `m[k]` scans the map, i.e. the runtime of the operation is linear in the size of the map. **Parameters** - `K` — The type of the Map keys. Arbitrary type except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. - `V` — The type of the Map values. Arbitrary type. -You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. - **Examples** -Consider the table: +Create a table with a column of type map: ``` sql CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; INSERT INTO tab VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); ``` -To select all `key2` values: +To select `key2` values: ```sql SELECT m['key2'] FROM tab; @@ -43,7 +45,8 @@ Result: └─────────────────────────┘ ``` -If the map does not contain the requested key, `m[k]` returns the value type's default value, e.g. `0` for integer types, `''` for string types or `[]` for Array types. +If the requested key `k` is not contained in the map, `m[k]` returns the value type's default value, e.g. `0` for integer types and `''` for string types. +To check whether a key exists in a map, you can use function [mapContains](../../sql-reference/functions/tuple-map-functions#mapcontains). ```sql CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; @@ -92,8 +95,8 @@ Query: CREATE TABLE tab (m Map(String, UInt64)) ENGINE = Memory; INSERT INTO tab VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); -SELECT m.keys FROM tab; -SELECT m.values FROM tab; +SELECT m.keys FROM tab; -- same as mapKeys(m) +SELECT m.values FROM tab; -- same as mapValues(m) ``` Result: From c1fae43abf8c90071a72ed6dfc1d15acec6a0893 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 May 2024 12:37:47 +0000 Subject: [PATCH 16/36] Fix FastTest --- tests/queries/0_stateless/02995_baseline_23_12_1.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv index 7b001180abf..4c0c9125b46 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -30,6 +30,7 @@ allow_experimental_hash_functions 0 allow_experimental_inverted_index 0 allow_experimental_lightweight_delete 1 allow_experimental_live_view 0 +allow_experimental_map_type 1 allow_experimental_materialized_postgresql_table 0 allow_experimental_nlp_functions 0 allow_experimental_object_type 0 From 0012f97c3bb6e6c64774190346d0ebf2d262111b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 May 2024 22:01:49 +0000 Subject: [PATCH 17/36] Add very simple test for unusual map types --- ...dls_and_merges_with_unusual_maps.reference | 8 +++++ ...3034_ddls_and_merges_with_unusual_maps.sql | 32 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference create mode 100644 tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference new file mode 100644 index 00000000000..9dc0605fd5a --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference @@ -0,0 +1,8 @@ +Map(Nothing, ...) is non-comparable --> not usable as primary key +But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ... +Map(Float32, ...) and Map(LC(String)) are okay as primary key +{1:'a'} {'b':'b'} +{2:'aa'} {'bb':'bb'} +Map(Float32, ...) and Map(LC(String)) as non-primary-key +{1:'a'} {'b':'b'} +{3:'aaa'} {'bb':'bb'} diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql new file mode 100644 index 00000000000..74a13eb7a28 --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql @@ -0,0 +1,32 @@ +-- Tests maps with "unusual" key types (Float32, Nothing, LowCardinality(String)) + +SET mutations_sync = 2; + +DROP TABLE IF EXISTS tab; + +SELECT 'Map(Nothing, ...) is non-comparable --> not usable as primary key'; +CREATE TABLE tab (m1 Map(Nothing, String)) ENGINE = MergeTree ORDER BY m1; -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } + +SELECT 'But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ...'; +CREATE TABLE tab (m3 Map(Nothing, String)) ENGINE = MergeTree ORDER BY tuple(); +-- INSERT INTO tab VALUES (map('', 'd')); -- { serverError NOT_IMPLEMENTED } -- <-- for some weird reason the test won't let me set an expected error +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) are okay as primary key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY (m1, m2); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')); +INSERT INTO tab VALUES (map(2.0, 'aa'), map('bb', 'bb')); + +-- Test merge +OPTIMIZE TABLE tab FINAL; +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) as non-primary-key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')), (map(2.0, 'aa'), map('bb', 'bb')); +ALTER TABLE tab UPDATE m1 = map(3.0, 'aaa') WHERE m1 = map(2.0, 'aa'); +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; From 61b3640b3bcb7987a6bb1ae19efeccf7eb496943 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 May 2024 22:23:55 +0000 Subject: [PATCH 18/36] docs fixes --- docs/en/sql-reference/functions/tuple-map-functions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 0bc5ef38f89..d537963f15d 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -44,10 +44,9 @@ Result: ## mapFromArrays Creates a map from an array of keys and an array of values. -The second argument can also be a map, it will be casted during execution to an array. The function is a convenient alternative to syntax `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. -For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. +For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')` or `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** From 85e5175a5933e62ea7862d11b4cd688ba119e3ab Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 31 May 2024 11:17:22 +0000 Subject: [PATCH 19/36] Doc updates --- docs/en/sql-reference/data-types/map.md | 1 - .../functions/tuple-map-functions.md | 42 +++++++++++++++---- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 4c264f2d7b4..9f82c2f093a 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -12,7 +12,6 @@ Unlike other databases, maps are not unique in ClickHouse, i.e. a map can contai (The reason for that is that maps are internally implemented as `Array(Tuple(K, V))`.) You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. -If more than one element with key `k` exists, the value for the first element is returned. Also, `m[k]` scans the map, i.e. the runtime of the operation is linear in the size of the map. **Parameters** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index d537963f15d..1665c912b8b 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -45,8 +45,12 @@ Result: Creates a map from an array of keys and an array of values. -The function is a convenient alternative to syntax `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. -For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')` or `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. +The function is a convenient alternative to syntax `CAST([...], 'Map(key_type, value_type)')`. +For example, instead of writing +- `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, or +- `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')` + +you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** @@ -81,7 +85,7 @@ Result: └───────────────────────────────────────────┘ ``` -Query +`mapFromArrays` also accepts arguments of type [Map](../data-types/map.md). These are casted to array of tuples during execution. ```sql SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) @@ -97,11 +101,11 @@ Result: ## extractKeyValuePairs -Creates a value of type [Map(String, String)](../data-types/map.md) from key-value pairs in a string. -Parsing is robust towards noise (e.g. log files). -A key-value pair consists of a key, followed by a key-value delimiter, and a value. -Key value pairs are separated by `pair_delimiter`. -Keys and values can also be quoted. +Convertes a string of key-value pairs to a [Map(String, String)](../data-types/map.md). +Parsing is tolerant towards noise (e.g. log files). +Key-value pairs in the input string consist of a key, followed by a key-value delimiter, and a value. +Key value pairs are separated by a pair delimiter. +Keys and values can be quoted. **Syntax** @@ -140,7 +144,7 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` -With a single quote `'`as quoting character: +With a single quote `'` as quoting character: ``` sql SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv @@ -168,6 +172,26 @@ Result: └────────────────────────┘ ``` +To restore a map string key-value pairs serialized with `toString`: + +```sql +SELECT + map('John', '33', 'Paula', '31') AS m, + toString(m) as map_serialized, + extractKeyValuePairs(map_serialized, ':', ',', '\'') AS map_restored +FORMAT Vertical; +``` + +Result: + +``` +Row 1: +────── +m: {'John':'33','Paula':'31'} +map_serialized: {'John':'33','Paula':'31'} +map_restored: {'John':'33','Paula':'31'} +``` + ## extractKeyValuePairsWithEscaping Same as `extractKeyValuePairs` but supports escaping. From 29f57acbc2c67a089188565fe52a23962eaadbd4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 31 May 2024 12:26:18 +0000 Subject: [PATCH 20/36] Fix spelling --- docs/en/sql-reference/functions/tuple-map-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 1665c912b8b..ad40725d680 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -101,7 +101,7 @@ Result: ## extractKeyValuePairs -Convertes a string of key-value pairs to a [Map(String, String)](../data-types/map.md). +Converts a string of key-value pairs to a [Map(String, String)](../data-types/map.md). Parsing is tolerant towards noise (e.g. log files). Key-value pairs in the input string consist of a key, followed by a key-value delimiter, and a value. Key value pairs are separated by a pair delimiter. From 1e01adda0419d9231e360e2eb35915c312f72893 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 3 Jun 2024 17:33:09 +0200 Subject: [PATCH 21/36] Do not try to write columns.txt if it does not exists for write-once storages Remember that write-once does not allows move: [local] 2024.06.03 17:21:22.520165 [ 10032 ] {c923ec81-c802-407f-8078-a5b46cec4d21} DiskObjectStorageTransaction: An error occurred while executing transaction's operation #0 (PureMetadataObjectStorageOperation): Code: 48. DB::Exception: Operation is not implemented. (NOT_IMPLEMENTED), Stack trace (when copying this message, always include the lines below): 0. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/exception:141: Poco::Exception::Exception(String const&, int) @ 0x00000000143113d6 1. /src/ch/clickhouse/src/Common/Exception.cpp:101: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000ae2fd6c 2. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/string:1499: DB::Exception::Exception(PreformattedMessage&&, int) @ 0x0000000006133a61 3. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/vector:434: DB::Exception::Exception<>(int, FormatStringHelperImpl<>) @ 0x000000000613f0dc 4. /src/ch/clickhouse/src/Disks/ObjectStorages/IMetadataStorage.h:164: DB::IMetadataTransaction::throwNotImplemented() @ 0x000000000f895364 7. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:701: DB::(anonymous namespace)::PureMetadataObjectStorageOperation::execute(std::shared_ptr) @ 0x000000000f84ddde 8. /src/ch/clickhouse/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp:946: DB::DiskObjectStorageTransaction::commit() @ 0x000000000f84d74f 9. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:701: DB::DiskObjectStorage::moveFile(String const&, String const&, bool) @ 0x000000000f83c472 10. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/string:1499: DB::DataPartStorageOnDiskFull::moveFile(String const&, String const&) @ 0x00000000111b541a 11. /src/ch/clickhouse/src/Storages/MergeTree/IMergeTreeDataPart.cpp:1053: DB::IMergeTreeDataPart::writeColumns(DB::NamesAndTypesList const&, DB::WriteSettings const&) @ 0x00000000111f28be 12. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:701: DB::IMergeTreeDataPart::loadColumns(bool) @ 0x00000000111e9883 13. /src/ch/clickhouse/src/Storages/MergeTree/IMergeTreeDataPart.cpp:714: DB::IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool, bool) @ 0x00000000111e8b12 14. /src/ch/clickhouse/src/Storages/MergeTree/MergeTreeData.cpp:0: DB::MergeTreeData::loadDataPart(DB::MergeTreePartInfo const&, String const&, std::shared_ptr const&, DB::MergeTreeDataPartState, std::mutex&) @ 0x00000000112849d0 Note, that DataPartStorageOnDiskBase::isReadonly() is used only in loadColumns() for calling writeColumns() in case of columns.txt does not exists Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 82af6c1fbe8..4ea3cfed099 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -229,7 +229,7 @@ bool DataPartStorageOnDiskBase::isBroken() const bool DataPartStorageOnDiskBase::isReadonly() const { - return volume->getDisk()->isReadOnly(); + return volume->getDisk()->isReadOnly() || volume->getDisk()->isWriteOnce(); } void DataPartStorageOnDiskBase::syncRevision(UInt64 revision) const From d45956440883b686415732ba3df5cbb23e22f753 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 3 Jun 2024 19:30:01 +0200 Subject: [PATCH 22/36] review changes --- .../test.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index eed4ca84b46..dd332e0d984 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -17,21 +17,17 @@ def start_cluster(): finally: cluster.shutdown() - -def test_distributed_table_with_alias(start_cluster): - node.query("") +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): node.query( """ - drop table IF EXISTS local; - drop table IF EXISTS dist; + DROP TABLE IF EXISTS local; + DROP TABLE IF EXISTS dist; CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); - SET prefer_localhost_replica = 1; """ ) - try: - # Attempt to execute the query - node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;") - except QueryRuntimeException as e: - # If an exception occurs, fail the test - pytest.fail(f"Query raised an exception: {e}") + + node.query(f"SET prefer_localhost_replica = {prefer_localhost_replica};") + + node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;") From 0e2efda93a7b7755de04002304f80402e167eb35 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 3 Jun 2024 19:39:41 +0200 Subject: [PATCH 23/36] black check --- .../test_unknown_column_dist_table_with_alias/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index dd332e0d984..0a0d3dbb092 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -17,6 +17,7 @@ def start_cluster(): finally: cluster.shutdown() + @pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): node.query( From 9d30a7f056ea68cc2a11974bcecf93da95bbc753 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jun 2024 11:30:22 +0200 Subject: [PATCH 24/36] Fix Keeper snapshot size in mntr --- src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperSnapshotManager.cpp | 8 ++-- src/Coordination/KeeperSnapshotManager.h | 1 + src/Coordination/KeeperSnapshotManagerS3.cpp | 2 +- src/Coordination/KeeperStateMachine.cpp | 14 +++--- src/Coordination/KeeperStateMachine.h | 2 +- .../integration/test_keeper_snapshots/test.py | 44 ++++++++++++++++++- 7 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 28902bc8591..8de9f8dfa1c 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -305,7 +305,7 @@ String MonitorCommand::run() print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount()); print(ret, "approximate_data_size", state_machine.getApproximateDataSize()); print(ret, "key_arena_size", state_machine.getKeyArenaSize()); - print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotBufSize()); + print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize()); #if defined(OS_LINUX) || defined(OS_DARWIN) print(ret, "open_file_descriptor_count", getCurrentProcessFDCount()); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index f25ccab86b1..b8fab410daf 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -690,7 +690,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - const auto & [path, disk] = latest_itr->second; + const auto & [path, disk, size] = latest_itr->second; disk->removeFile(path); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); @@ -702,7 +702,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { - const auto & [snapshot_path, snapshot_disk] = existing_snapshots.at(up_to_log_idx); + const auto & [snapshot_path, snapshot_disk, size] = existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; auto reader = snapshot_disk->readFile(snapshot_path); copyData(*reader, writer); @@ -817,7 +817,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - const auto & [path, disk] = itr->second; + const auto & [path, disk, size] = itr->second; disk->removeFileIfExists(path); existing_snapshots.erase(itr); } @@ -873,7 +873,7 @@ SnapshotFileInfo KeeperSnapshotManager::getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { - const auto & [path, disk] = existing_snapshots.at(getLatestSnapshotIndex()); + const auto & [path, disk, size] = existing_snapshots.at(getLatestSnapshotIndex()); try { diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 8ba0f92a564..c875fb65075 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -95,6 +95,7 @@ struct SnapshotFileInfo { std::string path; DiskPtr disk; + mutable std::optional size = std::nullopt; }; using KeeperStorageSnapshotPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index b984b8ad18e..611fb345262 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -147,7 +147,7 @@ std::shared_ptr KeeperSnapshotManagerS void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info) { - const auto & [snapshot_path, snapshot_disk] = snapshot_file_info; + const auto & [snapshot_path, snapshot_disk, snapshot_size] = snapshot_file_info; try { auto s3_client = getSnapshotS3Client(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 3dbdb329b93..c5f40cea7d9 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -733,7 +733,7 @@ int KeeperStateMachine::read_logical_snp_obj( return -1; } - const auto & [path, disk] = latest_snapshot_info; + const auto & [path, disk, size] = latest_snapshot_info; if (isLocalDisk(*disk)) { auto full_path = fs::path(disk->getPath()) / path; @@ -862,12 +862,16 @@ uint64_t KeeperStateMachine::getKeyArenaSize() const return storage->getArenaDataSize(); } -uint64_t KeeperStateMachine::getLatestSnapshotBufSize() const +uint64_t KeeperStateMachine::getLatestSnapshotSize() const { std::lock_guard lock(snapshots_lock); - if (latest_snapshot_buf) - return latest_snapshot_buf->size(); - return 0; + if (latest_snapshot_info.disk == nullptr) + return 0; + + if (!latest_snapshot_info.size.has_value()) + latest_snapshot_info.size = latest_snapshot_info.disk->getFileSize(latest_snapshot_info.path); + + return *latest_snapshot_info.size; } ClusterConfigPtr KeeperStateMachine::getClusterConfig() const diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 3727beadb98..c4d47f9aa61 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -124,7 +124,7 @@ public: uint64_t getTotalEphemeralNodesCount() const; uint64_t getApproximateDataSize() const; uint64_t getKeyArenaSize() const; - uint64_t getLatestSnapshotBufSize() const; + uint64_t getLatestSnapshotSize() const; void recalculateStorageStats(); diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index f6f746c892e..6dfb2078559 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -17,7 +17,6 @@ node = cluster.add_instance( "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, - with_zookeeper=True, ) @@ -211,3 +210,46 @@ def test_invalid_snapshot(started_cluster): node_zk.close() except: pass + + +def test_snapshot_size(started_cluster): + keeper_utils.wait_until_connected(started_cluster, node) + node_zk = None + try: + node_zk = get_connection_zk("node") + + node_zk.create("/test_state_size", b"somevalue") + strs = [] + for i in range(100): + strs.append(random_string(123).encode()) + node_zk.create("/test_state_size/node" + str(i), strs[i]) + + node_zk.stop() + node_zk.close() + + keeper_utils.send_4lw_cmd(started_cluster, node, "csnp") + node.wait_for_log_line("Created persistent snapshot") + + def get_snapshot_size(): + return int( + next( + filter( + lambda line: "zk_latest_snapshot_size" in line, + keeper_utils.send_4lw_cmd(started_cluster, node, "mntr").split( + "\n" + ), + ) + ).split("\t")[1] + ) + + assert get_snapshot_size() != 0 + restart_clickhouse() + assert get_snapshot_size() != 0 + finally: + try: + if node_zk is not None: + node_zk.stop() + node_zk.close() + + except: + pass From 5c9aac484d5cc24a6dc2d41c8e971cc160b02544 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jun 2024 14:38:47 +0200 Subject: [PATCH 25/36] Avoid taking lock --- programs/keeper-converter/KeeperConverter.cpp | 2 +- src/Common/CopyableAtomic.h | 10 +++-- src/Coordination/KeeperDispatcher.cpp | 3 +- src/Coordination/KeeperSnapshotManager.cpp | 42 ++++++++++--------- src/Coordination/KeeperSnapshotManager.h | 20 ++++++--- src/Coordination/KeeperSnapshotManagerS3.cpp | 14 +++---- src/Coordination/KeeperSnapshotManagerS3.h | 8 ++-- src/Coordination/KeeperStateMachine.cpp | 41 ++++++++++++------ src/Coordination/KeeperStateMachine.h | 2 +- 9 files changed, 85 insertions(+), 57 deletions(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 7518227a070..639e0957f49 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -57,7 +57,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); - std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl; + std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl; } catch (...) { diff --git a/src/Common/CopyableAtomic.h b/src/Common/CopyableAtomic.h index 227fffe927f..199f0cac134 100644 --- a/src/Common/CopyableAtomic.h +++ b/src/Common/CopyableAtomic.h @@ -13,8 +13,9 @@ struct CopyableAtomic : value(other.value.load()) {} - explicit CopyableAtomic(T && value_) - : value(std::forward(value_)) + template U> + explicit CopyableAtomic(U && value_) + : value(std::forward(value_)) {} CopyableAtomic & operator=(const CopyableAtomic & other) @@ -23,9 +24,10 @@ struct CopyableAtomic return *this; } - CopyableAtomic & operator=(bool value_) + template U> + CopyableAtomic & operator=(U && value_) { - value = value_; + value = std::forward(value_); return *this; } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 84e1632e9b1..b4389da082d 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -332,9 +332,10 @@ void KeeperDispatcher::snapshotThread() if (shutdown_called) break; - if (snapshot_file_info.path.empty()) + if (!snapshot_file_info) continue; + chassert(snapshot_file_info->disk != nullptr); if (isLeader()) snapshot_s3.uploadSnapshot(snapshot_file_info); } diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index b8fab410daf..8bf42b434cd 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -618,7 +618,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( LOG_TRACE(log, "Found {} on {}", snapshot_file, disk->getName()); size_t snapshot_up_to = getSnapshotPathUpToLogIdx(snapshot_file); - auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{snapshot_file, disk}); + auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, std::make_shared(snapshot_file, disk)); if (!inserted) LOG_WARNING( @@ -651,7 +651,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( moveSnapshotsIfNeeded(); } -SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { ReadBufferFromNuraftBuffer reader(buffer); @@ -672,11 +672,12 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); + auto snapshot_file_info = std::make_shared(snapshot_file_name, disk); + existing_snapshots.emplace(up_to_log_idx, snapshot_file_info); removeOutdatedSnapshotsIfNeeded(); moveSnapshotsIfNeeded(); - return {snapshot_file_name, disk}; + return snapshot_file_info; } nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() @@ -690,7 +691,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - const auto & [path, disk, size] = latest_itr->second; + const auto & [path, disk, size] = *latest_itr->second; disk->removeFile(path); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); @@ -702,7 +703,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { - const auto & [snapshot_path, snapshot_disk, size] = existing_snapshots.at(up_to_log_idx); + const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; auto reader = snapshot_disk->readFile(snapshot_path); copyData(*reader, writer); @@ -794,18 +795,18 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (idx == latest_snapshot_idx) { - if (file_info.disk != latest_snapshot_disk) + if (file_info->disk != latest_snapshot_disk) { - moveSnapshotBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context); - file_info.disk = latest_snapshot_disk; + moveSnapshotBetweenDisks(file_info->disk, file_info->path, latest_snapshot_disk, file_info->path, keeper_context); + file_info->disk = latest_snapshot_disk; } } else { - if (file_info.disk != disk) + if (file_info->disk != disk) { - moveSnapshotBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context); - file_info.disk = disk; + moveSnapshotBetweenDisks(file_info->disk, file_info->path, disk, file_info->path, keeper_context); + file_info->disk = disk; } } } @@ -817,12 +818,12 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - const auto & [path, disk, size] = itr->second; + const auto & [path, disk, size] = *itr->second; disk->removeFileIfExists(path); existing_snapshots.erase(itr); } -SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) { auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); @@ -847,7 +848,8 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); + auto snapshot_file_info = std::make_shared(snapshot_file_name, disk); + existing_snapshots.emplace(up_to_log_idx, snapshot_file_info); try { @@ -859,7 +861,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor tryLogCurrentException(log, "Failed to cleanup and/or move older snapshots"); } - return {snapshot_file_name, disk}; + return snapshot_file_info; } size_t KeeperSnapshotManager::getLatestSnapshotIndex() const @@ -869,23 +871,23 @@ size_t KeeperSnapshotManager::getLatestSnapshotIndex() const return 0; } -SnapshotFileInfo KeeperSnapshotManager::getLatestSnapshotInfo() const +SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { - const auto & [path, disk, size] = existing_snapshots.at(getLatestSnapshotIndex()); + const auto & [path, disk, size] = *existing_snapshots.at(getLatestSnapshotIndex()); try { if (disk->exists(path)) - return {path, disk}; + return std::make_shared(path, disk); } catch (...) { tryLogCurrentException(log); } } - return {"", nullptr}; + return nullptr; } } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index c875fb65075..36bf6c8775c 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include namespace DB @@ -93,13 +94,20 @@ public: struct SnapshotFileInfo { + SnapshotFileInfo(std::string path_, DiskPtr disk_) + : path(std::move(path_)) + , disk(std::move(disk_)) + {} + std::string path; DiskPtr disk; - mutable std::optional size = std::nullopt; + mutable std::atomic size{0}; }; +using SnapshotFileInfoPtr = std::shared_ptr; + using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function; +using CreateSnapshotCallback = std::function(KeeperStorageSnapshotPtr &&, bool)>; using SnapshotMetaAndStorage = std::pair; @@ -122,10 +130,10 @@ public: nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; /// Serialize already compressed snapshot to disk (return path) - SnapshotFileInfo serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); + SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); /// Serialize snapshot directly to disk - SnapshotFileInfo serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); + SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; @@ -144,7 +152,7 @@ public: /// The most fresh snapshot log index we have size_t getLatestSnapshotIndex() const; - SnapshotFileInfo getLatestSnapshotInfo() const; + SnapshotFileInfoPtr getLatestSnapshotInfo() const; private: void removeOutdatedSnapshotsIfNeeded(); @@ -160,7 +168,7 @@ private: /// How many snapshots to keep before remove const size_t snapshots_to_keep; /// All existing snapshots in our path (log_index -> path) - std::map existing_snapshots; + std::map existing_snapshots; /// Compress snapshots in common ZSTD format instead of custom ClickHouse block LZ4 format const bool compress_snapshots_zstd; /// Superdigest for deserialization of storage diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 611fb345262..c3b390465be 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -145,9 +145,9 @@ std::shared_ptr KeeperSnapshotManagerS return snapshot_s3_client; } -void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info) +void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfoPtr & snapshot_file_info) { - const auto & [snapshot_path, snapshot_disk, snapshot_size] = snapshot_file_info; + const auto & [snapshot_path, snapshot_disk, snapshot_size] = *snapshot_file_info; try { auto s3_client = getSnapshotS3Client(); @@ -169,9 +169,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh ); }; - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_file_info.path); + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); - auto snapshot_file = snapshot_disk->readFile(snapshot_file_info.path); + auto snapshot_file = snapshot_disk->readFile(snapshot_path); auto snapshot_name = fs::path(snapshot_path).filename().string(); auto lock_file = fmt::format(".{}_LOCK", snapshot_name); @@ -261,7 +261,7 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() while (!shutdown_called) { - SnapshotFileInfo snapshot_file_info; + SnapshotFileInfoPtr snapshot_file_info; if (!snapshots_s3_queue.pop(snapshot_file_info)) break; @@ -272,7 +272,7 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() } } -void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload) +void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload) { if (getSnapshotS3Client() == nullptr) return; @@ -280,7 +280,7 @@ void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfo & file_info, if (async_upload) { if (!snapshots_s3_queue.push(file_info)) - LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info.path); + LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info->path); return; } diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index d03deb60c1a..bf58afc4b12 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -12,8 +12,6 @@ #include #include - -#include #endif namespace DB @@ -27,13 +25,13 @@ public: /// 'macros' are used to substitute macros in endpoint of disks void updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); - void uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload = true); + void uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload = true); /// 'macros' are used to substitute macros in endpoint of disks void startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void shutdown(); private: - using SnapshotS3Queue = ConcurrentBoundedQueue; + using SnapshotS3Queue = ConcurrentBoundedQueue; SnapshotS3Queue snapshots_s3_queue; /// Upload new snapshots to S3 @@ -51,7 +49,7 @@ private: std::shared_ptr getSnapshotS3Client() const; - void uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info); + void uploadSnapshotImpl(const SnapshotFileInfoPtr & snapshot_file_info); /// Thread upload snapshots to S3 in the background void snapshotS3Thread(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index c5f40cea7d9..e4d661dfe17 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -90,8 +91,9 @@ void KeeperStateMachine::init() latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index); auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf); latest_snapshot_info = snapshot_manager.getLatestSnapshotInfo(); + chassert(latest_snapshot_info); - if (isLocalDisk(*latest_snapshot_info.disk)) + if (isLocalDisk(*latest_snapshot_info->disk)) latest_snapshot_buf = nullptr; storage = std::move(snapshot_deserialization_result.storage); @@ -603,7 +605,11 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreations); - LOG_DEBUG(log, "Created persistent snapshot {} with path {}", latest_snapshot_meta->get_last_log_idx(), latest_snapshot_info.path); + LOG_DEBUG( + log, + "Created persistent snapshot {} with path {}", + latest_snapshot_meta->get_last_log_idx(), + latest_snapshot_info->path); } } } @@ -627,7 +633,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res when_done(ret, exception); - return ret ? latest_snapshot_info : SnapshotFileInfo{}; + return ret ? latest_snapshot_info : nullptr; }; if (keeper_context->getServerState() == KeeperContext::Phase::SHUTDOWN) @@ -635,9 +641,9 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled."); auto snapshot_file_info = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot), /*execute_only_cleanup=*/false); - if (!snapshot_file_info.path.empty() && snapshot_manager_s3) + if (snapshot_file_info && snapshot_manager_s3) { - LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info.path); + LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info->path); snapshot_manager_s3->uploadSnapshot(snapshot_file_info, /* asnyc_upload */ false); } @@ -672,7 +678,7 @@ void KeeperStateMachine::save_logical_snp_obj( latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(data, s.get_last_log_idx()); latest_snapshot_meta = cloned_meta; latest_snapshot_buf = std::move(cloned_buffer); - LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info.path); + LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info->path); obj_id++; ProfileEvents::increment(ProfileEvents::KeeperSaveSnapshot); } @@ -733,7 +739,7 @@ int KeeperStateMachine::read_logical_snp_obj( return -1; } - const auto & [path, disk, size] = latest_snapshot_info; + const auto & [path, disk, size] = *latest_snapshot_info; if (isLocalDisk(*disk)) { auto full_path = fs::path(disk->getPath()) / path; @@ -864,14 +870,25 @@ uint64_t KeeperStateMachine::getKeyArenaSize() const uint64_t KeeperStateMachine::getLatestSnapshotSize() const { - std::lock_guard lock(snapshots_lock); - if (latest_snapshot_info.disk == nullptr) + auto snapshot_info = [&] + { + std::lock_guard lock(snapshots_lock); + return latest_snapshot_info; + }(); + + if (snapshot_info == nullptr || snapshot_info->disk == nullptr) return 0; - if (!latest_snapshot_info.size.has_value()) - latest_snapshot_info.size = latest_snapshot_info.disk->getFileSize(latest_snapshot_info.path); + /// there is a possibility multiple threads can try to get size + /// this can happen in rare cases while it's not a heavy operation + size_t size = snapshot_info->size.load(std::memory_order_relaxed); + if (size == 0) + { + size = snapshot_info->disk->getFileSize(snapshot_info->path); + snapshot_info->size.store(size, std::memory_order_relaxed); + } - return *latest_snapshot_info.size; + return size; } ClusterConfigPtr KeeperStateMachine::getClusterConfig() const diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index c4d47f9aa61..ee6109f0a17 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -135,7 +135,7 @@ private: /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. SnapshotMetadataPtr latest_snapshot_meta = nullptr; - SnapshotFileInfo latest_snapshot_info; + std::shared_ptr latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic From f61c40dbbf0504c84eb24bbd2d47754d18a09c0d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jun 2024 16:08:01 +0200 Subject: [PATCH 26/36] do not apply settings explicitly --- .../test_unknown_column_dist_table_with_alias/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index 0a0d3dbb092..48d6e264182 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -29,6 +29,4 @@ def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): """ ) - node.query(f"SET prefer_localhost_replica = {prefer_localhost_replica};") - - node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;") + node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;", settings={"prefer_localhost_replica": prefer_localhost_replica}) From 0ae901c479e1568dbea6e79103dfe3f24de6709a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jun 2024 16:13:30 +0200 Subject: [PATCH 27/36] Better --- src/Coordination/KeeperSnapshotManagerS3.cpp | 10 ++++++---- src/Coordination/KeeperSnapshotManagerS3.h | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index c3b390465be..9991bef7be5 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -145,9 +145,9 @@ std::shared_ptr KeeperSnapshotManagerS return snapshot_s3_client; } -void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfoPtr & snapshot_file_info) +void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info) { - const auto & [snapshot_path, snapshot_disk, snapshot_size] = *snapshot_file_info; + const auto & [snapshot_path, snapshot_disk, snapshot_size] = snapshot_file_info; try { auto s3_client = getSnapshotS3Client(); @@ -268,12 +268,14 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() if (shutdown_called) break; - uploadSnapshotImpl(snapshot_file_info); + uploadSnapshotImpl(*snapshot_file_info); } } void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload) { + chassert(file_info); + if (getSnapshotS3Client() == nullptr) return; @@ -285,7 +287,7 @@ void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfoPtr & file_in return; } - uploadSnapshotImpl(file_info); + uploadSnapshotImpl(*file_info); } void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index bf58afc4b12..be7a9d4bd32 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -49,7 +49,7 @@ private: std::shared_ptr getSnapshotS3Client() const; - void uploadSnapshotImpl(const SnapshotFileInfoPtr & snapshot_file_info); + void uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info); /// Thread upload snapshots to S3 in the background void snapshotS3Thread(); @@ -61,7 +61,7 @@ public: KeeperSnapshotManagerS3() = default; void updateS3Configuration(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} - void uploadSnapshot(const SnapshotFileInfo &, [[maybe_unused]] bool async_upload = true) {} + void uploadSnapshot(const SnapshotFileInfoPtr &, [[maybe_unused]] bool async_upload = true) {} void startup(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} From 0f362394b3c0c66bc596e0068ca32d47d492cbaa Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jun 2024 18:29:07 +0200 Subject: [PATCH 28/36] black check --- .../test_unknown_column_dist_table_with_alias/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py index 48d6e264182..0d3890f3e09 100644 --- a/tests/integration/test_unknown_column_dist_table_with_alias/test.py +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -29,4 +29,7 @@ def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): """ ) - node.query("WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;", settings={"prefer_localhost_replica": prefer_localhost_replica}) + node.query( + "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;", + settings={"prefer_localhost_replica": prefer_localhost_replica}, + ) From c05d4e52d332bdadc887ad568d8313bea55ada03 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 Jun 2024 19:26:12 +0200 Subject: [PATCH 29/36] Fix memory leak in slry --- src/Interpreters/Cache/SLRUFileCachePriority.cpp | 7 +++++-- src/Interpreters/Cache/SLRUFileCachePriority.h | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index bad8cb18525..7a3fdf5160e 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -325,7 +325,7 @@ void SLRUFileCachePriority::downgrade(IteratorPtr iterator, const CachePriorityG candidate_it->getEntry()->toString()); } - const size_t entry_size = candidate_it->entry->size; + const size_t entry_size = candidate_it->getEntry()->size; if (!probationary_queue.canFit(entry_size, 1, lock)) { throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -483,7 +483,10 @@ SLRUFileCachePriority::SLRUIterator::SLRUIterator( SLRUFileCachePriority::EntryPtr SLRUFileCachePriority::SLRUIterator::getEntry() const { - return entry; + auto entry_ptr = entry.lock(); + if (!entry_ptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry pointer expired"); + return entry_ptr; } size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CachePriorityGuard::Lock & lock) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index ee3cafe322d..2102a0ec558 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -125,7 +125,10 @@ private: SLRUFileCachePriority * cache_priority; LRUFileCachePriority::LRUIterator lru_iterator; - const EntryPtr entry; + /// Entry itself is stored by lru_iterator.entry. + /// We have it as a separate field to use entry without requiring any lock + /// (which will be required if we wanted to get entry from lru_iterator.getEntry()). + const std::weak_ptr entry; /// Atomic, /// but needed only in order to do FileSegment::getInfo() without any lock, /// which is done for system tables and logging. From 7f7eb3f300343948f5b270d4ce76c2e9b8bf0b6b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jun 2024 21:05:31 +0000 Subject: [PATCH 30/36] Update test --- .../0_stateless/03034_ddls_and_merges_with_unusual_maps.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql index 74a13eb7a28..a3cd59df1cd 100644 --- a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql @@ -9,7 +9,8 @@ CREATE TABLE tab (m1 Map(Nothing, String)) ENGINE = MergeTree ORDER BY m1; -- { SELECT 'But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ...'; CREATE TABLE tab (m3 Map(Nothing, String)) ENGINE = MergeTree ORDER BY tuple(); --- INSERT INTO tab VALUES (map('', 'd')); -- { serverError NOT_IMPLEMENTED } -- <-- for some weird reason the test won't let me set an expected error +-- INSERT INTO tab VALUES (map('', 'd')); -- { serverError NOT_IMPLEMENTED } -- The client can't serialize the data and fails. The query + -- doesn't reach the server and we can't check via 'serverError' :-/ DROP TABLE tab; SELECT 'Map(Float32, ...) and Map(LC(String)) are okay as primary key'; From f4f094d8c6d06e873cc952586827dedb1cabd0a0 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 4 Jun 2024 23:21:48 +0200 Subject: [PATCH 31/36] Fix wrong supported year recognition --- SECURITY.md | 5 ++++- utils/security-generator/generate_security.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index f8511fb42d6..8635951dc0e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -19,7 +19,10 @@ The following versions of ClickHouse server are currently supported with securit | 24.3 | ✔️ | | 24.2 | ❌ | | 24.1 | ❌ | -| 23.* | ❌ | +| 23.12 | ❌ | +| 23.11 | ❌ | +| 23.10 | ❌ | +| 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | | 23.6 | ❌ | diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py index 2b37e28257a..21c6b72e476 100755 --- a/utils/security-generator/generate_security.py +++ b/utils/security-generator/generate_security.py @@ -98,7 +98,7 @@ def generate_supported_versions() -> str: lts.append(version) to_append = f"| {version} | ✔️ |" if to_append: - if len(regular) == max_regular or len(lts) == max_lts: + if len(regular) == max_regular and len(lts) == max_lts: supported_year = year table.append(to_append) continue From 3bb1157a0e24aef28382253d616bae05812b8e55 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 5 Jun 2024 07:32:57 +0000 Subject: [PATCH 32/36] Fix clang-tidy --- src/Storages/S3Queue/S3QueueTableMetadata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index d2ebe8382e4..d53b60570ae 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -38,7 +38,7 @@ struct S3QueueTableMetadata String toString() const; void checkEquals(const S3QueueTableMetadata & from_zk) const; - static void checkEquals(const S3QueueSettings & lhs, const S3QueueSettings & rhs); + static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); private: void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; From 06c94383419c338e08a5b3b38a591911432e27fa Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 5 Jun 2024 09:34:31 +0200 Subject: [PATCH 33/36] CI: Move coverage build to non-special build list --- .github/workflows/master.yml | 2 +- .github/workflows/pull_request.yml | 2 +- tests/ci/ci_config.py | 14 ++++++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c2a893a8e99..f37a5fd43b8 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -106,7 +106,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Builds_1] + needs: [RunConfig, Builds_1, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7d22554473e..e4deaf9f35e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -135,7 +135,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Tests_1, Tests_2] + needs: [RunConfig, Builds_1, Tests_1, Builds_2, Tests_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index cb40f7899a4..ca46088eb45 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -593,8 +593,6 @@ class CIConfig: stage_type = CIStages.BUILDS_2 elif self.is_docs_job(job_name): stage_type = CIStages.TESTS_1 - elif job_name == JobNames.BUILD_CHECK_SPECIAL: - stage_type = CIStages.TESTS_2 elif self.is_test_job(job_name): if job_name in CI_CONFIG.test_configs: required_build = CI_CONFIG.test_configs[job_name].required_build @@ -854,6 +852,14 @@ class CIConfig: f"The requirement '{test_config}' for " f"'{test_name}' is not found in builds" ) + if ( + test_config.required_build + and test_config.required_build + not in self.builds_report_config[JobNames.BUILD_CHECK].builds + ): + errors.append( + f"Test job' required build must be from [{JobNames.BUILD_CHECK}] list" + ) if errors: raise KeyError("config contains errors", errors) @@ -1068,6 +1074,8 @@ CI_CONFIG = CIConfig( Build.PACKAGE_MSAN, Build.PACKAGE_DEBUG, Build.BINARY_RELEASE, + Build.PACKAGE_RELEASE_COVERAGE, + Build.FUZZERS, ] ), JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( @@ -1084,8 +1092,6 @@ CI_CONFIG = CIConfig( Build.BINARY_LOONGARCH64, Build.BINARY_AMD64_COMPAT, Build.BINARY_AMD64_MUSL, - Build.PACKAGE_RELEASE_COVERAGE, - Build.FUZZERS, ] ), }, From c6f6b1da7db236d21fca81ffe4b09ad297112b8b Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 5 Jun 2024 09:47:11 +0200 Subject: [PATCH 34/36] add comment in yml wf --- .github/workflows/master.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f37a5fd43b8..91dcb6a4968 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -106,6 +106,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} # stage for jobs that do not prohibit merge Tests_3: + # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. needs: [RunConfig, Builds_1, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml From 418911c7d7fe868b69d95010765c78cb7e3b9136 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 5 Jun 2024 10:21:00 +0200 Subject: [PATCH 35/36] CI: fix CI await feature --- tests/ci/ci_cache.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 9ce8923f58f..56a84272a63 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -701,7 +701,6 @@ class CiCache: len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT and round_cnt < MAX_ROUNDS_TO_WAIT ): - await_finished: Set[str] = set() round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", @@ -713,6 +712,7 @@ class CiCache: expired_sec = 0 start_at = int(time.time()) while expired_sec < TIMEOUT and self.jobs_to_wait: + await_finished: Set[str] = set() time.sleep(poll_interval_sec) self.update() for job_name, job_config in self.jobs_to_wait.items(): @@ -759,11 +759,6 @@ class CiCache: print( f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" ) - if await_finished: - GHActions.print_in_group( - f"Finished jobs, round [{round_cnt}]: [{list(await_finished)}]", - list(await_finished), - ) GHActions.print_in_group( "Remaining jobs:", From 3e8929f5f04e4d863637bfc82630a4173814b94f Mon Sep 17 00:00:00 2001 From: tomershafir Date: Wed, 5 Jun 2024 12:40:18 +0300 Subject: [PATCH 36/36] xray: rename cmake file and build only on amd64 linux --- CMakeLists.txt | 2 +- cmake/{instrument.cmake => xray_instrumentation.cmake} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename cmake/{instrument.cmake => xray_instrumentation.cmake} (91%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 601cbe7201c..455adc24182 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,7 +122,7 @@ add_library(global-libs INTERFACE) include (cmake/sanitize.cmake) -include (cmake/instrument.cmake) +include (cmake/xray_instrumentation.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) diff --git a/cmake/instrument.cmake b/cmake/xray_instrumentation.cmake similarity index 91% rename from cmake/instrument.cmake rename to cmake/xray_instrumentation.cmake index bd2fb4d45fc..661c0575e54 100644 --- a/cmake/instrument.cmake +++ b/cmake/xray_instrumentation.cmake @@ -7,7 +7,7 @@ if (NOT ENABLE_XRAY) return() endif() -if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD))) +if (NOT (ARCH_AMD64 AND OS_LINUX)) message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported") return() endif()