From 2f083fd1a709dd236ffaf37a7a059cafef8c4a41 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 15:08:14 +0000 Subject: [PATCH 01/30] Fix resolving dynamic subcolumns in analyzer --- src/Analyzer/Resolve/IdentifierResolver.cpp | 35 ++++++++++++++++--- .../03198_dynamic_read_subcolumns.reference | 8 +++++ .../03198_dynamic_read_subcolumns.sql | 18 ++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference create mode 100644 tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 692a31b66ba..4347f07e829 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -678,9 +679,33 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( bool match_full_identifier = false; const auto & identifier_full_name = identifier_without_column_qualifier.getFullName(); - auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); - bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end(); - if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name)) + + ColumnNodePtr result_column_node; + bool can_resolve_directly_from_storage = false; + bool is_subcolumn = false; + if (auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); it != table_expression_data.column_name_to_column_node.end()) + { + can_resolve_directly_from_storage = true; + is_subcolumn = table_expression_data.subcolumn_names.contains(identifier_full_name); + result_column_node = it->second; + } + /// Check if it's a dynamic subcolumn + else + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); + auto jt = table_expression_data.column_name_to_column_node.find(column_name); + if (jt != table_expression_data.column_name_to_column_node.end() && jt->second->getColumnType()->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = jt->second->getColumnType()->tryGetSubcolumnType(dynamic_subcolumn_name)) + { + result_column_node = std::make_shared(NameAndTypePair{identifier_full_name, dynamic_subcolumn_type}, jt->second->getColumnSource()); + can_resolve_directly_from_storage = true; + is_subcolumn = true; + } + } + } + + if (can_resolve_directly_from_storage && is_subcolumn) { /** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage. * For example, consider the following SQL query: @@ -696,11 +721,11 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( if (can_resolve_directly_from_storage) { match_full_identifier = true; - result_expression = it->second; + result_expression = result_column_node; } else { - it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); + auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); if (it != table_expression_data.column_name_to_column_node.end()) result_expression = it->second; } diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..59d65220c83 --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference @@ -0,0 +1,8 @@ +foo +\N +\N +foo +\N +\N +6 +6 diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql new file mode 100644 index 00000000000..7f88b4cd347 --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -0,0 +1,18 @@ +SET allow_experimental_dynamic_type = 1; +DROP TABLE IF EXISTS test_dynamic; +CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO test_dynamic VALUES (1, 'foo'), (2, 1111), (3, [1, 2, 3]); +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 0; +SYSTEM FLUSH LOGS; +SELECT + ProfileEvents['FileOpen'] +FROM system.query_log +WHERE (type = 2) AND (query LIKE 'SELECT d.String %test_dynamic%') AND (current_database = currentDatabase()) +ORDER BY event_time_microseconds DESC +LIMIT 2; + +DROP TABLE test_dynamic; + From ac315785d3ab89756b160ef1610bf5e4125c332b Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 23:41:57 +0000 Subject: [PATCH 02/30] Update tests --- .../03040_dynamic_type_alters_1.reference | 108 +++++++++--------- .../03040_dynamic_type_alters_1.sh | 2 +- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..b41645f0c4a 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -30,16 +30,16 @@ alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 12 \N \N \N +12 12 12 \N 12 \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N insert after alter modify column 1 @@ -48,16 +48,16 @@ insert after alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 12 \N \N \N +12 12 12 \N 12 \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N 15 15 \N \N \N \N \N @@ -120,57 +120,57 @@ alter modify column 3 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 \N \N \N \N \N \N \N +1 1 \N \N \N \N \N \N \N +2 2 \N \N \N \N \N \N \N +3 3 \N \N \N \N 3 \N \N +4 4 \N \N \N \N 4 \N \N +5 5 \N \N \N \N 5 \N \N +6 6 \N \N \N str_6 \N \N \N +7 7 \N \N \N str_7 \N \N \N +8 8 \N \N \N str_8 \N \N \N +9 9 \N \N \N \N \N \N \N +10 10 \N \N \N \N \N \N \N +11 11 \N \N \N \N \N \N \N +12 12 \N \N \N \N 12 \N \N +13 13 \N \N \N str_13 \N \N \N +14 14 \N \N \N \N \N \N \N +15 15 \N \N \N \N \N \N \N +16 16 \N \N \N 16 \N \N \N +17 17 \N \N \N str_17 \N \N \N +18 18 \N \N \N 1970-01-19 \N \N \N +19 19 \N \N \N \N \N \N \N +20 20 \N \N \N \N 20 \N \N +21 21 \N \N \N str_21 \N \N \N +22 22 \N \N \N \N \N 1970-01-23 \N insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 \N \N \N \N \N \N \N +1 1 \N \N \N \N \N \N \N +2 2 \N \N \N \N \N \N \N +3 3 \N \N \N \N 3 \N \N +4 4 \N \N \N \N 4 \N \N +5 5 \N \N \N \N 5 \N \N +6 6 \N \N \N str_6 \N \N \N +7 7 \N \N \N str_7 \N \N \N +8 8 \N \N \N str_8 \N \N \N +9 9 \N \N \N \N \N \N \N +10 10 \N \N \N \N \N \N \N +11 11 \N \N \N \N \N \N \N +12 12 \N \N \N \N 12 \N \N +13 13 \N \N \N str_13 \N \N \N +14 14 \N \N \N \N \N \N \N +15 15 \N \N \N \N \N \N \N +16 16 \N \N \N 16 \N \N \N +17 17 \N \N \N str_17 \N \N \N +18 18 \N \N \N 1970-01-19 \N \N \N +19 19 \N \N \N \N \N \N \N +20 20 \N \N \N \N 20 \N \N +21 21 \N \N \N str_21 \N \N \N +22 22 \N \N \N \N \N 1970-01-23 \N 23 \N \N \N \N \N \N \N \N 24 24 24 \N \N \N \N \N \N 25 str_25 \N str_25 \N \N \N \N \N diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 7a73be20a4d..080a7e583bf 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=0" function run() { From 65a34b8f63cc0cd6fda3d454e7505a01f0a7a75a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 07:27:44 +0000 Subject: [PATCH 03/30] Fix test flakiness --- tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index 7f88b4cd347..a9e3d88b7c7 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; From be3d707a5b519e9d810bfcf65b10379b00323762 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 16:37:01 +0000 Subject: [PATCH 04/30] Update test --- .../0_stateless/03198_dynamic_read_subcolumns.reference | 9 +++++++++ .../0_stateless/03198_dynamic_read_subcolumns.sql | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference index 59d65220c83..f9fe0c2ad77 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference @@ -1,3 +1,12 @@ +QUERY id: 0 + PROJECTION COLUMNS + d.String Nullable(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: d.String, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test_dynamic + SETTINGS allow_experimental_analyzer=1 foo \N \N diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index a9e3d88b7c7..27fca179580 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,9 +1,10 @@ --- Tags: no-random-settings +-- Tags: no-random-settings, no-s3-storage SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_dynamic VALUES (1, 'foo'), (2, 1111), (3, [1, 2, 3]); +EXPLAIN QUERY TREE SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; SYSTEM DROP MARK CACHE; SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; SYSTEM DROP MARK CACHE; From a2de85427473eb3ec982a3fc79cb85fe0ab3cbcb Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 16:37:54 +0000 Subject: [PATCH 05/30] Remove allow_experimental_analyzer from the test --- tests/queries/0_stateless/03040_dynamic_type_alters_1.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 080a7e583bf..1f2a6a31ad7 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" function run() { From f0966f519b5bbc88c63f604ab97603b504023869 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Jul 2024 21:00:51 +0000 Subject: [PATCH 06/30] Revert changes in test reference --- .../03040_dynamic_type_alters_1.reference | 108 +++++++++--------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index b41645f0c4a..ca98ec0963c 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -30,16 +30,16 @@ alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N +12 12 12 12 \N \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N insert after alter modify column 1 @@ -48,16 +48,16 @@ insert after alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N +12 12 12 12 \N \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N 15 15 \N \N \N \N \N @@ -120,57 +120,57 @@ alter modify column 3 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N \N \N -1 1 \N \N \N \N \N \N \N -2 2 \N \N \N \N \N \N \N -3 3 \N \N \N \N 3 \N \N -4 4 \N \N \N \N 4 \N \N -5 5 \N \N \N \N 5 \N \N -6 6 \N \N \N str_6 \N \N \N -7 7 \N \N \N str_7 \N \N \N -8 8 \N \N \N str_8 \N \N \N -9 9 \N \N \N \N \N \N \N -10 10 \N \N \N \N \N \N \N -11 11 \N \N \N \N \N \N \N -12 12 \N \N \N \N 12 \N \N -13 13 \N \N \N str_13 \N \N \N -14 14 \N \N \N \N \N \N \N -15 15 \N \N \N \N \N \N \N -16 16 \N \N \N 16 \N \N \N -17 17 \N \N \N str_17 \N \N \N -18 18 \N \N \N 1970-01-19 \N \N \N -19 19 \N \N \N \N \N \N \N -20 20 \N \N \N \N 20 \N \N -21 21 \N \N \N str_21 \N \N \N -22 22 \N \N \N \N \N 1970-01-23 \N +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 \N \N \N \N \N \N \N -1 1 \N \N \N \N \N \N \N -2 2 \N \N \N \N \N \N \N -3 3 \N \N \N \N 3 \N \N -4 4 \N \N \N \N 4 \N \N -5 5 \N \N \N \N 5 \N \N -6 6 \N \N \N str_6 \N \N \N -7 7 \N \N \N str_7 \N \N \N -8 8 \N \N \N str_8 \N \N \N -9 9 \N \N \N \N \N \N \N -10 10 \N \N \N \N \N \N \N -11 11 \N \N \N \N \N \N \N -12 12 \N \N \N \N 12 \N \N -13 13 \N \N \N str_13 \N \N \N -14 14 \N \N \N \N \N \N \N -15 15 \N \N \N \N \N \N \N -16 16 \N \N \N 16 \N \N \N -17 17 \N \N \N str_17 \N \N \N -18 18 \N \N \N 1970-01-19 \N \N \N -19 19 \N \N \N \N \N \N \N -20 20 \N \N \N \N 20 \N \N -21 21 \N \N \N str_21 \N \N \N -22 22 \N \N \N \N \N 1970-01-23 \N +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N 23 \N \N \N \N \N \N \N \N 24 24 24 \N \N \N \N \N \N 25 str_25 \N str_25 \N \N \N \N \N From d821dfe9033357ae6038f3f7b16d87fd91e05da9 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 6 Jul 2024 15:03:13 +0000 Subject: [PATCH 07/30] Fix tests --- src/Interpreters/getColumnFromBlock.cpp | 2 +- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 2e70a58b5a1..89166bb2b3e 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -40,7 +40,7 @@ ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requ auto subcolumn_name = requested_subcolumn.getSubcolumnName(); /// If requested subcolumn is dynamic, we should first perform cast and then /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. - if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + if ((elem->type->hasDynamicSubcolumns() || requested_column_type->hasDynamicSubcolumns()) && !elem->type->equals(*requested_column_type)) { auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 65517061b99..35d4f3a02cf 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" function test() From fc3ae09a11c45346f18ec39094adc448dcb9ec45 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 6 Jul 2024 21:24:56 +0200 Subject: [PATCH 08/30] Fix typo in the test --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 35d4f3a02cf..a1cb35e4609 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 30bb03d6c327d15c7161ca18b35b9edf04310de4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:13:44 +0200 Subject: [PATCH 09/30] Update test --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index a1cb35e4609..65517061b99 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 0f38756f4e02061ddb4c144c2d80e574ccb97cbd Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:33:23 +0200 Subject: [PATCH 10/30] Disable optimize_functions_to_subcolumns in 03036_dynamic_read_subcolumns --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 65517061b99..a1cb35e4609 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 22154aa079132184183659ebee80abcc855be403 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Jul 2024 14:36:31 +0000 Subject: [PATCH 11/30] Disable optimize_functions_to_subcolumns in 03202_dynamic_null_map_subcolumn --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index aa06e48376c..d357419f03a 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" function test() From b60151c1d8e8e2d0fe5ea0946bdbdbfae9abecc1 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 10 Jul 2024 10:13:49 +0000 Subject: [PATCH 12/30] Remove optimize_functions_to_subcolumns setting from test --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index d357419f03a..aa06e48376c 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 25157fd17d27429cf27d4cd1c9e6e87be37d6798 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:01:42 +0200 Subject: [PATCH 13/30] Disable optimize_functions_to_subcolumns in test 03202_dynamic_null_map_subcolumn --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index aa06e48376c..968c9e5271f 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 8c2e4900e301b7b54c8c266f2c2c8ef4a4ef5e2c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 10:48:34 +0000 Subject: [PATCH 14/30] Introduce isServerConstant() flag to functions. --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 6 +- src/Functions/IFunction.h | 12 ++++ src/Functions/IFunctionAdaptors.h | 2 + src/Functions/getScalar.cpp | 4 ++ src/Functions/serverConstants.cpp | 56 +++++++++++-------- src/Planner/PlannerExpressionAnalysis.cpp | 6 +- .../02992_analyzer_group_by_const.sql | 43 ++++++++++++++ 7 files changed, 99 insertions(+), 30 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..818e75f9594 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -3410,14 +3410,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_base = function->build(argument_columns); /// Do not constant fold get scalar functions - bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || - function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; + // bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || + // function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; /** If function is suitable for constant folding try to convert it to constant. * Example: SELECT plus(1, 1); * Result: SELECT 2; */ - if (function_base->isSuitableForConstantFolding() && !disable_constant_folding) + if (function_base->isSuitableForConstantFolding()) // && !disable_constant_folding) { auto result_type = function_base->getResultType(); auto executable_function = function_base->prepare(argument_columns); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index a66456cabee..23aab420338 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -230,6 +230,17 @@ public: virtual bool isDeterministicInScopeOfQuery() const { return true; } + /** This is a special flags for functions which return constant value for the server, + * but the result could be different for different servers in distributed query. + * + * This fuctions can't support constant folding on the initiator, but can on the follower. + * We can't apply some optimizations as well (e.g. can't remove constant result from GROUP BY key). + * So, it is convenient to have a special flag for them. + * + * Examples are: "__getScalar" and every function from serverConstants.cpp + */ + virtual bool isServerConstant() const { return false; } + /** Lets you know if the function is monotonic in a range of values. * This is used to work with the index in a sorted chunk of data. * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`. @@ -488,6 +499,7 @@ public: virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const { return false; } virtual bool isDeterministic() const { return true; } virtual bool isDeterministicInScopeOfQuery() const { return true; } + virtual bool isServerConstant() const { return false; } virtual bool isStateful() const { return false; } using ShortCircuitSettings = IFunctionBase::ShortCircuitSettings; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 04bd03a776e..c9929a083c1 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -86,6 +86,8 @@ public: bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + bool isServerConstant() const override { return function->isServerConstant(); } + bool isShortCircuit(ShortCircuitSettings & settings, size_t number_of_arguments) const override { return function->isShortCircuit(settings, number_of_arguments); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & args) const override { return function->isSuitableForShortCircuitArgumentsExecution(args); } diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index 7196cbc0a36..5131dca962e 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -49,6 +49,8 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isServerConstant() const override { return true; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1 || !isString(arguments[0].type) || !arguments[0].column || !isColumnConst(*arguments[0].column)) @@ -105,6 +107,8 @@ public: bool isDeterministic() const override { return false; } + bool isServerConstant() const override { return true; } + bool isSuitableForConstantFolding() const override { return !is_distributed; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index e7e423058f1..1e2c8c48bab 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -20,117 +20,125 @@ namespace DB namespace { + template + class FunctionServerConstantBase : public FunctionConstantBase + { + public: + using FunctionConstantBase::FunctionConstantBase; + bool isServerConstant() const override { return true; } + }; + #if defined(__ELF__) && !defined(OS_FREEBSD) /// buildId() - returns the compiler build id of the running binary. - class FunctionBuildId : public FunctionConstantBase + class FunctionBuildId : public FunctionServerConstantBase { public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionServerConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} }; #endif /// Get the host name. It is constant on single server, but is not constant in distributed queries. - class FunctionHostName : public FunctionConstantBase + class FunctionHostName : public FunctionServerConstantBase { public: static constexpr auto name = "hostName"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} + explicit FunctionHostName(ContextPtr context) : FunctionServerConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} }; - class FunctionServerUUID : public FunctionConstantBase + class FunctionServerUUID : public FunctionServerConstantBase { public: static constexpr auto name = "serverUUID"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(ServerUUID::get(), context->isDistributed()) {} + explicit FunctionServerUUID(ContextPtr context) : FunctionServerConstantBase(ServerUUID::get(), context->isDistributed()) {} }; - class FunctionTCPPort : public FunctionConstantBase + class FunctionTCPPort : public FunctionServerConstantBase { public: static constexpr auto name = "tcpPort"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTCPPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} + explicit FunctionTCPPort(ContextPtr context) : FunctionServerConstantBase(context->getTCPPort(), context->isDistributed()) {} }; /// Returns timezone for current session. - class FunctionTimezone : public FunctionConstantBase + class FunctionTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} }; /// Returns the server time zone (timezone in which server runs). - class FunctionServerTimezone : public FunctionConstantBase + class FunctionServerTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "serverTimezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} + explicit FunctionServerTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} }; /// Returns server uptime in seconds. - class FunctionUptime : public FunctionConstantBase + class FunctionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "uptime"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} + explicit FunctionUptime(ContextPtr context) : FunctionServerConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} }; /// version() - returns the current version as a string. - class FunctionVersion : public FunctionConstantBase + class FunctionVersion : public FunctionServerConstantBase { public: static constexpr auto name = "version"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {} + explicit FunctionVersion(ContextPtr context) : FunctionServerConstantBase(VERSION_STRING, context->isDistributed()) {} }; /// revision() - returns the current revision. - class FunctionRevision : public FunctionConstantBase + class FunctionRevision : public FunctionServerConstantBase { public: static constexpr auto name = "revision"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionRevision(ContextPtr context) : FunctionConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} + explicit FunctionRevision(ContextPtr context) : FunctionServerConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} }; - class FunctionZooKeeperSessionUptime : public FunctionConstantBase + class FunctionZooKeeperSessionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "zookeeperSessionUptime"; explicit FunctionZooKeeperSessionUptime(ContextPtr context) - : FunctionConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) + : FunctionServerConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) { } static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionGetOSKernelVersion : public FunctionConstantBase + class FunctionGetOSKernelVersion : public FunctionServerConstantBase { public: static constexpr auto name = "getOSKernelVersion"; - explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} + explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionServerConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionDisplayName : public FunctionConstantBase + class FunctionDisplayName : public FunctionServerConstantBase { public: static constexpr auto name = "displayName"; - explicit FunctionDisplayName(ContextPtr context) : FunctionConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} + explicit FunctionDisplayName(ContextPtr context) : FunctionServerConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) {return std::make_shared(context); } }; } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ceb506d1bbb..f0de3e69113 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -85,14 +85,14 @@ bool canRemoveConstantFromGroupByKey(const ConstantNode & root) else if (function_node) { /// Do not allow removing constants like `hostName()` - if (!function_node->getFunctionOrThrow()->isDeterministic()) + if (function_node->getFunctionOrThrow()->isServerConstant()) return false; for (const auto & child : function_node->getArguments()) nodes.push(child.get()); } - else - return false; + // else + // return false; } return true; diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index ede6e0deed9..89049515a37 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -30,3 +30,46 @@ SELECT min(dummy) FROM remote('127.0.0.{2,3}', system.one) GROUP BY y; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1 + 1 FORMAT Null; + +CREATE TABLE ttt (hr DateTime, ts DateTime) ENGINE=Memory +as select '2000-01-01' d, d; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1, c2, c3 FORMAT Null; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1, c2, c3 FORMAT Null; From dbfc9fc71fb32a8bc14d50ee6f15db224157fd22 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 13:04:41 +0200 Subject: [PATCH 15/30] Update 03036_dynamic_read_subcolumns.sh --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index a1cb35e4609..65517061b99 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 1e9f47707fc7b47703c0e2957da09192bd044e81 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 13:05:40 +0200 Subject: [PATCH 16/30] Fix optimizing array size to subcolumns in dynamic subcolumn --- src/Storages/ColumnsDescription.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index c07583cd39d..da749812167 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -701,15 +701,15 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns auto jt = subcolumns.get<0>().find(column_name); if (jt != subcolumns.get<0>().end()) return *jt; - } - /// Check for dynamic subcolumns. - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) - { - if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) - return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } } return {}; From 64d2b01d4a1a56f7cab978142cb15cf71ff0b4a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 15:51:16 +0000 Subject: [PATCH 17/30] Fixing style --- src/Functions/IFunction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 23aab420338..12931b51df2 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -233,7 +233,7 @@ public: /** This is a special flags for functions which return constant value for the server, * but the result could be different for different servers in distributed query. * - * This fuctions can't support constant folding on the initiator, but can on the follower. + * This functions can't support constant folding on the initiator, but can on the follower. * We can't apply some optimizations as well (e.g. can't remove constant result from GROUP BY key). * So, it is convenient to have a special flag for them. * From 00fa457a5d14a96b9f3e7ed3b937506f07476789 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 17:06:07 +0000 Subject: [PATCH 18/30] Fixing test. --- src/Functions/getMacro.cpp | 2 + .../02992_analyzer_group_by_const.sql | 52 +++++++++---------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/Functions/getMacro.cpp b/src/Functions/getMacro.cpp index 8172fc8ba2e..b7f0b34d652 100644 --- a/src/Functions/getMacro.cpp +++ b/src/Functions/getMacro.cpp @@ -53,6 +53,8 @@ public: /// getMacro may return different values on different shards/replicas, so it's not constant for distributed query bool isSuitableForConstantFolding() const override { return !is_distributed; } + bool isServerConstant() const override { return true; } + size_t getNumberOfArguments() const override { return 1; diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index 89049515a37..2a9e673d7bc 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -31,39 +31,39 @@ SELECT FROM remote('127.0.0.{2,3}', system.one) GROUP BY y; -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{1,2}', default, ttt) -GROUP BY c1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{3,2}', default, ttt) -GROUP BY c1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{1,2}', default, ttt) -GROUP BY c1 + 1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{3,2}', default, ttt) -GROUP BY c1 + 1 FORMAT Null; - CREATE TABLE ttt (hr DateTime, ts DateTime) ENGINE=Memory as select '2000-01-01' d, d; +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + SELECT count(), tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 -FROM remote('127.0.0.{1,2}', default, ttt) +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) GROUP BY c1, c2, c3 FORMAT Null; SELECT @@ -71,5 +71,5 @@ SELECT tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 -FROM remote('127.0.0.{3,2}', default, ttt) +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) GROUP BY c1, c2, c3 FORMAT Null; From 5bce8086a86716f03ceb36f22412ed21852c610e Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 19:02:20 +0200 Subject: [PATCH 19/30] CI: Create release workflow updates --- tests/ci/create_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 7f4cf8c787a..cd01b646ebe 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -120,7 +120,7 @@ class ReleaseInfo: expected_version = copy(version) expected_version.bump() expected_tag_prefix = ( - f"v{expected_version.major}.{expected_version.minor}-" + f"v{expected_version.major}.{expected_version.minor}." ) expected_tag_suffix = "-new" else: From 2ddd0078dcf5f33004cfb558328303c29306a32a Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 21:07:41 +0200 Subject: [PATCH 20/30] rename reusable docker workflow --- .github/workflows/backport_branches.yml | 2 +- .github/workflows/create_release.yml | 2 +- .../workflows/{reusable_docker.yml => docker_test_images.yml} | 0 .github/workflows/master.yml | 2 +- .github/workflows/merge_queue.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/pull_request.yml | 2 +- .github/workflows/release_branches.yml | 2 +- tests/ci/create_release.py | 2 ++ 9 files changed, 9 insertions(+), 7 deletions(-) rename .github/workflows/{reusable_docker.yml => docker_test_images.yml} (100%) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 64c3d2f8342..60bd79560eb 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -62,7 +62,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 972aff90195..d8d27531f28 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -94,7 +94,7 @@ jobs: echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - - name: Generate ChangeLog + - name: Create ChangeLog PR if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} uses: peter-evans/create-pull-request@v6 with: diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/docker_test_images.yml similarity index 100% rename from .github/workflows/reusable_docker.yml rename to .github/workflows/docker_test_images.yml diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2a7e6f737ab..d27b1987532 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -58,7 +58,7 @@ jobs: # BuildDockers: # needs: [RunConfig] # if: ${{ !failure() && !cancelled() }} -# uses: ./.github/workflows/reusable_docker.yml +# uses: ./.github/workflows/docker_test_images.yml # with: # data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 01685ee1f5a..c08c3fb30ac 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -51,7 +51,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3e1c5576e7d..bffe5b4c1bf 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -40,7 +40,7 @@ jobs: } >> "$GITHUB_OUTPUT" BuildDockers: needs: [RunConfig] - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c9f4f858825..04ce4d29ce9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -72,7 +72,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6bf846d7535..e1a97b91016 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -57,7 +57,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index cd01b646ebe..f377e12d11c 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -618,6 +618,8 @@ sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip sudo apt install --yes python3-boto3 sudo apt install --yes python3-github sudo apt install --yes python3-unidiff +sudo apt install --yes python3-tqdm # cloud changelog +sudo apt install --yes python3-thefuzz # cloud changelog sudo apt install --yes s3fs ### INSTALL AWS CLI From 4fd832fe482eebd909e642fe2ae2e7468cccebe2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 15 Jul 2024 09:57:32 +0200 Subject: [PATCH 21/30] Fix 02918_parallel_replicas_custom_key_unavailable_replica --- ...02918_parallel_replicas_custom_key_unavailable_replica.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql index b9bc6974c47..c36b5bebd58 100644 --- a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -6,6 +6,9 @@ INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM num SET prefer_localhost_replica=0; +--- if we try to query unavaialble replica, connection will be retried +--- but a warning log message will be printed out +SET send_logs_level='error'; -- { echoOn } SELECT y, count() FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) @@ -26,5 +29,6 @@ GROUP BY y ORDER BY y SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; -- { echoOff } +SET send_logs_level='warning'; DROP TABLE 02918_parallel_replicas; From e06f1ed108f12b24fff716dc380bb5adce27a921 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Mon, 15 Jul 2024 17:59:00 +0200 Subject: [PATCH 22/30] Update the MySQL interface doc with compatibility notes --- docs/en/interfaces/mysql.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 42820505406..9ad13ae4f05 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,7 +6,18 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)). +ClickHouse supports the MySQL wire protocol. This allows certain clients that do not have native ClickHouse connectors leverage the MySQL protocol instead, and it has been validated with the following BI tools: + +- [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md) +- [Tableau Online](../integrations/tableau-online) +- [QuickSight](../integrations/quicksight) + +If you are trying other untested clients or integrations, keep in mind that there could be the following limitations: + +- SSL implementation might not be fully compatible; there could be potential [TLS SNI](https://www.cloudflare.com/learning/ssl/what-is-sni/) issues. +- A particular tool might require dialect features (e.g., MySQL-specific functions or settings) that are not implemented yet. + +If there is a native driver available (e.g., [DBeaver](../integrations/dbeaver)), it is always preferred to use it instead of the MySQL interface. Additionally, while most of the MySQL language clients should work fine, MySQL interface is not guaranteed to be a drop-in replacement for a codebase with existing MySQL queries. ## Enabling the MySQL Interface On ClickHouse Cloud From 293b5c81d088223f9428e4b9e73f036bc8165d7d Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Mon, 15 Jul 2024 18:10:46 +0200 Subject: [PATCH 23/30] Add an entry about MySQL interface issues reporting --- docs/en/interfaces/mysql.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 9ad13ae4f05..30a8d9569a9 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -19,6 +19,8 @@ If you are trying other untested clients or integrations, keep in mind that ther If there is a native driver available (e.g., [DBeaver](../integrations/dbeaver)), it is always preferred to use it instead of the MySQL interface. Additionally, while most of the MySQL language clients should work fine, MySQL interface is not guaranteed to be a drop-in replacement for a codebase with existing MySQL queries. +If your use case involves a particular tool that does not have a native ClickHouse driver, and you would like to use it via the MySQL interface and you found certain incompatibilities - please [create an issue](https://github.com/ClickHouse/ClickHouse/issues) in the ClickHouse repository. + ## Enabling the MySQL Interface On ClickHouse Cloud 1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab From 50f068efe930e264de938da46040a1e056200032 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 17:00:53 +0000 Subject: [PATCH 24/30] add dedicated runner to libfuzzer, update docker --- docker/test/libfuzzer/run_libfuzzer.py | 70 +--------------------- tests/ci/build_download_helper.py | 2 +- tests/fuzz/build.sh | 3 + utils/libfuzzer/runner.py | 80 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 69 deletions(-) create mode 100644 utils/libfuzzer/runner.py diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index fa67805dfa5..25d5ede028d 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -1,77 +1,11 @@ #!/usr/bin/env python3 -import configparser -import logging -import os -from pathlib import Path -import subprocess - -DEBUGGER = os.getenv("DEBUGGER", "") -FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") - - -def run_fuzzer(fuzzer: str): - logging.info(f"Running fuzzer {fuzzer}...") - - corpus_dir = f"{fuzzer}.in" - with Path(corpus_dir) as path: - if not path.exists() or not path.is_dir(): - corpus_dir = "" - - options_file = f"{fuzzer}.options" - custom_libfuzzer_options = "" - - with Path(options_file) as path: - if path.exists() and path.is_file(): - parser = configparser.ConfigParser() - parser.read(path) - - if parser.has_section("asan"): - os.environ["ASAN_OPTIONS"] = ( - f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" - ) - - if parser.has_section("msan"): - os.environ["MSAN_OPTIONS"] = ( - f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" - ) - - if parser.has_section("ubsan"): - os.environ["UBSAN_OPTIONS"] = ( - f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" - ) - - if parser.has_section("libfuzzer"): - custom_libfuzzer_options = " ".join( - "-%s=%s" % (key, value) - for key, value in parser["libfuzzer"].items() - ) - - cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" - if custom_libfuzzer_options: - cmd_line += f" {custom_libfuzzer_options}" - - if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): - cmd_line += f" -dict={fuzzer}.dict" - - cmd_line += " < /dev/null" - - logging.info(f"...will execute: {cmd_line}") - subprocess.check_call(cmd_line, shell=True) +import runner def main(): - logging.basicConfig(level=logging.INFO) - - subprocess.check_call("ls -al", shell=True) - - with Path() as current: - for fuzzer in current.iterdir(): - if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): - run_fuzzer(fuzzer) - + runner.run() exit(0) - if __name__ == "__main__": main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 8482abb26e0..0b7dd6dd0c9 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,5 +275,5 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip")), + lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py")), ) diff --git a/tests/fuzz/build.sh b/tests/fuzz/build.sh index 12f41f6e079..cf871d2de58 100755 --- a/tests/fuzz/build.sh +++ b/tests/fuzz/build.sh @@ -1,5 +1,8 @@ #!/bin/bash -eu +# copy runner +cp $SRC/utils/libfuzzer/runner.py $OUT/ + # copy fuzzer options and dictionaries cp $SRC/tests/fuzz/*.dict $OUT/ cp $SRC/tests/fuzz/*.options $OUT/ diff --git a/utils/libfuzzer/runner.py b/utils/libfuzzer/runner.py new file mode 100644 index 00000000000..435eb49383b --- /dev/null +++ b/utils/libfuzzer/runner.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +import configparser +import logging +import os +from pathlib import Path +import subprocess + +DEBUGGER = os.getenv("DEBUGGER", "") +FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") + + +def run_fuzzer(fuzzer: str): + logging.info(f"Running fuzzer {fuzzer}...") + + corpus_dir = f"{fuzzer}.in" + with Path(corpus_dir) as path: + if not path.exists() or not path.is_dir(): + corpus_dir = "" + + options_file = f"{fuzzer}.options" + custom_libfuzzer_options = "" + fuzzer_arguments = "" + + with Path(options_file) as path: + if path.exists() and path.is_file(): + parser = configparser.ConfigParser() + parser.read(path) + + if parser.has_section("asan"): + os.environ["ASAN_OPTIONS"] = ( + f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + ) + + if parser.has_section("msan"): + os.environ["MSAN_OPTIONS"] = ( + f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + ) + + if parser.has_section("ubsan"): + os.environ["UBSAN_OPTIONS"] = ( + f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + ) + + if parser.has_section("libfuzzer"): + custom_libfuzzer_options = " ".join( + "-%s=%s" % (key, value) + for key, value in parser["libfuzzer"].items() + ) + + if parser.has_section("fuzzer_arguments"): + fuzzer_arguments = " ".join( + ("%s" % key) if value == "" else ("%s=%s" % (key, value)) + for key, value in parser["fuzzer_arguments"].items() + ) + + cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" + if custom_libfuzzer_options: + cmd_line += f" {custom_libfuzzer_options}" + if fuzzer_arguments: + cmd_line += f" {fuzzer_arguments}" + + if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): + cmd_line += f" -dict={fuzzer}.dict" + + cmd_line += " < /dev/null" + + logging.info(f"...will execute: {cmd_line}") + subprocess.check_call(cmd_line, shell=True) + + +def run(): + logging.basicConfig(level=logging.INFO) + + subprocess.check_call("ls -al", shell=True) + + with Path() as current: + for fuzzer in current.iterdir(): + if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): + run_fuzzer(fuzzer) From 874b4d1af09496be6cc6601186379e8ad925b05a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 15 Jul 2024 19:15:45 +0200 Subject: [PATCH 25/30] fix tidy build --- src/Client/ConnectionPool.h | 2 +- src/Interpreters/Context.cpp | 1 - utils/keeper-bench/Runner.cpp | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index d83ecdd75f9..0fcb3c4e7e1 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -103,7 +103,7 @@ public: Entry get(const ConnectionTimeouts & timeouts, /// NOLINT const Settings & settings, - bool force_connected = true) override; + bool force_connected) override; std::string getDescription() const { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc1e87e7b7e..2602afd8b78 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 587e015b340..cd7f1437cf7 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 97c9739abcb6b6e5da113ac12a35078fe694aff6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 15 Jul 2024 17:21:50 +0000 Subject: [PATCH 26/30] Automatic style fix --- docker/test/libfuzzer/run_libfuzzer.py | 1 + tests/ci/build_download_helper.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index 25d5ede028d..06c816c8943 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -7,5 +7,6 @@ def main(): runner.run() exit(0) + if __name__ == "__main__": main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0b7dd6dd0c9..c3e516f836d 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,5 +275,7 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py")), + lambda x: x.endswith( + ("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py") + ), ) From 5fc4fada610b9d7a62528eaecb600bd5d16d48ae Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 18:18:15 +0200 Subject: [PATCH 27/30] add info about previous release --- tests/ci/ci_utils.py | 5 +++-- tests/ci/create_release.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 44bd37fe260..25875e55df6 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -49,14 +49,15 @@ class GHActions: class Shell: @classmethod def run_strict(cls, command): - subprocess.run( - command + " 2>&1", + res = subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True, ) + return res.stdout.strip() @classmethod def run(cls, command): diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index f377e12d11c..e5723e00e2a 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -14,6 +14,7 @@ from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper from autoscale_runners_lambda.lambda_shared.pr import Labels +from ci_utils import Shell from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -65,6 +66,8 @@ class ReleaseInfo: commit_sha: str # lts or stable codename: str + previous_release_tag: str + previous_release_sha: str @staticmethod def from_file(file_path: str) -> "ReleaseInfo": @@ -79,6 +82,8 @@ class ReleaseInfo: version = None release_branch = None release_tag = None + previous_release_tag = None + previous_release_sha = None codename = None assert release_type in ("patch", "new") if release_type == "new": @@ -101,6 +106,11 @@ class ReleaseInfo: codename = ( VersionType.STABLE ) # dummy value (artifactory won't be updated for new release) + previous_release_tag = expected_prev_tag + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha if release_type == "patch": with checkout(commit_ref): _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") @@ -118,6 +128,7 @@ class ReleaseInfo: ) if version.patch == 1: expected_version = copy(version) + previous_release_tag = f"v{version.major}.{version.minor}.1.1-new" expected_version.bump() expected_tag_prefix = ( f"v{expected_version.major}.{expected_version.minor}." @@ -128,6 +139,7 @@ class ReleaseInfo: f"v{version.major}.{version.minor}.{version.patch-1}." ) expected_tag_suffix = f"-{version.get_stable_release_type()}" + previous_release_tag = git.latest_tag if git.latest_tag.startswith( expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): @@ -137,8 +149,15 @@ class ReleaseInfo: False ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha + assert ( release_branch + and previous_release_tag + and previous_release_sha and commit_sha and release_tag and version @@ -150,6 +169,8 @@ class ReleaseInfo: release_tag=release_tag, version=version.string, codename=codename, + previous_release_tag=previous_release_tag, + previous_release_sha=previous_release_sha, ) with open(outfile, "w", encoding="utf-8") as f: print(json.dumps(dataclasses.asdict(res), indent=2), file=f) From 5efbb42a2a0efbb5a32267fec60ab73eddaea4cf Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Mon, 15 Jul 2024 19:31:41 +0200 Subject: [PATCH 28/30] Add QuickSight to spell check ignore --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a2a2fcd967a..987c4e8cac1 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -760,6 +760,7 @@ QueryCacheMisses QueryPreempted QueryThread QuickAssist +QuickSight QuoteMeta RBAC RClickHouse From 5361ade8e709f3ebe8916aa80669f4bec8b5c726 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 19:10:07 +0000 Subject: [PATCH 29/30] optimize --- docker/test/libfuzzer/Dockerfile | 2 +- docker/test/libfuzzer/run_libfuzzer.py | 12 ------------ tests/ci/build_download_helper.py | 4 +--- tests/fuzz/build.sh | 3 --- utils/libfuzzer/runner.py | 6 +++++- 5 files changed, 7 insertions(+), 20 deletions(-) delete mode 100755 docker/test/libfuzzer/run_libfuzzer.py diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index e6eb2ae336e..9ded2ca4e3a 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -39,7 +39,7 @@ ENV FUZZER_ARGS="-max_total_time=60" SHELL ["/bin/bash", "-c"] CMD set -o pipefail \ - && timeout -s 9 1h /run_libfuzzer.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log + && timeout -s 9 1h ./utils/libfuzzer/runner.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log # docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py deleted file mode 100755 index 06c816c8943..00000000000 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 - -import runner - - -def main(): - runner.run() - exit(0) - - -if __name__ == "__main__": - main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index c3e516f836d..8482abb26e0 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,7 +275,5 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith( - ("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py") - ), + lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip")), ) diff --git a/tests/fuzz/build.sh b/tests/fuzz/build.sh index cf871d2de58..12f41f6e079 100755 --- a/tests/fuzz/build.sh +++ b/tests/fuzz/build.sh @@ -1,8 +1,5 @@ #!/bin/bash -eu -# copy runner -cp $SRC/utils/libfuzzer/runner.py $OUT/ - # copy fuzzer options and dictionaries cp $SRC/tests/fuzz/*.dict $OUT/ cp $SRC/tests/fuzz/*.options $OUT/ diff --git a/utils/libfuzzer/runner.py b/utils/libfuzzer/runner.py index 435eb49383b..bbe648dbbc2 100644 --- a/utils/libfuzzer/runner.py +++ b/utils/libfuzzer/runner.py @@ -69,7 +69,7 @@ def run_fuzzer(fuzzer: str): subprocess.check_call(cmd_line, shell=True) -def run(): +def main(): logging.basicConfig(level=logging.INFO) subprocess.check_call("ls -al", shell=True) @@ -78,3 +78,7 @@ def run(): for fuzzer in current.iterdir(): if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): run_fuzzer(fuzzer) + + +if __name__ == "__main__": + main() From 0176e0b2ae534fee35a6e2bcab8a74a0783e8137 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 19:27:54 +0000 Subject: [PATCH 30/30] fix --- docker/test/libfuzzer/Dockerfile | 4 ---- tests/ci/libfuzzer_test_check.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index 9ded2ca4e3a..3ffae0cd921 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -33,13 +33,9 @@ RUN apt-get update \ COPY requirements.txt / RUN pip3 install --no-cache-dir -r /requirements.txt -COPY * / - ENV FUZZER_ARGS="-max_total_time=60" SHELL ["/bin/bash", "-c"] -CMD set -o pipefail \ - && timeout -s 9 1h ./utils/libfuzzer/runner.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log # docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 1f5936c3fec..d9e33229932 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -74,7 +74,8 @@ def get_run_command( f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"--volume={result_path}:/test_output " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} " + "python3 ./utils/runner.py" )