From 2f083fd1a709dd236ffaf37a7a059cafef8c4a41 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 15:08:14 +0000 Subject: [PATCH 01/35] Fix resolving dynamic subcolumns in analyzer --- src/Analyzer/Resolve/IdentifierResolver.cpp | 35 ++++++++++++++++--- .../03198_dynamic_read_subcolumns.reference | 8 +++++ .../03198_dynamic_read_subcolumns.sql | 18 ++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference create mode 100644 tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 692a31b66ba..4347f07e829 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -678,9 +679,33 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( bool match_full_identifier = false; const auto & identifier_full_name = identifier_without_column_qualifier.getFullName(); - auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); - bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end(); - if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name)) + + ColumnNodePtr result_column_node; + bool can_resolve_directly_from_storage = false; + bool is_subcolumn = false; + if (auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); it != table_expression_data.column_name_to_column_node.end()) + { + can_resolve_directly_from_storage = true; + is_subcolumn = table_expression_data.subcolumn_names.contains(identifier_full_name); + result_column_node = it->second; + } + /// Check if it's a dynamic subcolumn + else + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); + auto jt = table_expression_data.column_name_to_column_node.find(column_name); + if (jt != table_expression_data.column_name_to_column_node.end() && jt->second->getColumnType()->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = jt->second->getColumnType()->tryGetSubcolumnType(dynamic_subcolumn_name)) + { + result_column_node = std::make_shared(NameAndTypePair{identifier_full_name, dynamic_subcolumn_type}, jt->second->getColumnSource()); + can_resolve_directly_from_storage = true; + is_subcolumn = true; + } + } + } + + if (can_resolve_directly_from_storage && is_subcolumn) { /** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage. * For example, consider the following SQL query: @@ -696,11 +721,11 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( if (can_resolve_directly_from_storage) { match_full_identifier = true; - result_expression = it->second; + result_expression = result_column_node; } else { - it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); + auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); if (it != table_expression_data.column_name_to_column_node.end()) result_expression = it->second; } diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..59d65220c83 --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference @@ -0,0 +1,8 @@ +foo +\N +\N +foo +\N +\N +6 +6 diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql new file mode 100644 index 00000000000..7f88b4cd347 --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -0,0 +1,18 @@ +SET allow_experimental_dynamic_type = 1; +DROP TABLE IF EXISTS test_dynamic; +CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO test_dynamic VALUES (1, 'foo'), (2, 1111), (3, [1, 2, 3]); +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 0; +SYSTEM FLUSH LOGS; +SELECT + ProfileEvents['FileOpen'] +FROM system.query_log +WHERE (type = 2) AND (query LIKE 'SELECT d.String %test_dynamic%') AND (current_database = currentDatabase()) +ORDER BY event_time_microseconds DESC +LIMIT 2; + +DROP TABLE test_dynamic; + From ac315785d3ab89756b160ef1610bf5e4125c332b Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 23:41:57 +0000 Subject: [PATCH 02/35] Update tests --- .../03040_dynamic_type_alters_1.reference | 108 +++++++++--------- .../03040_dynamic_type_alters_1.sh | 2 +- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..b41645f0c4a 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -30,16 +30,16 @@ alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 12 \N \N \N +12 12 12 \N 12 \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N insert after alter modify column 1 @@ -48,16 +48,16 @@ insert after alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 12 \N \N \N +12 12 12 \N 12 \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N 15 15 \N \N \N \N \N @@ -120,57 +120,57 @@ alter modify column 3 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 \N \N \N \N \N \N \N +1 1 \N \N \N \N \N \N \N +2 2 \N \N \N \N \N \N \N +3 3 \N \N \N \N 3 \N \N +4 4 \N \N \N \N 4 \N \N +5 5 \N \N \N \N 5 \N \N +6 6 \N \N \N str_6 \N \N \N +7 7 \N \N \N str_7 \N \N \N +8 8 \N \N \N str_8 \N \N \N +9 9 \N \N \N \N \N \N \N +10 10 \N \N \N \N \N \N \N +11 11 \N \N \N \N \N \N \N +12 12 \N \N \N \N 12 \N \N +13 13 \N \N \N str_13 \N \N \N +14 14 \N \N \N \N \N \N \N +15 15 \N \N \N \N \N \N \N +16 16 \N \N \N 16 \N \N \N +17 17 \N \N \N str_17 \N \N \N +18 18 \N \N \N 1970-01-19 \N \N \N +19 19 \N \N \N \N \N \N \N +20 20 \N \N \N \N 20 \N \N +21 21 \N \N \N str_21 \N \N \N +22 22 \N \N \N \N \N 1970-01-23 \N insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 \N \N \N \N \N \N \N +1 1 \N \N \N \N \N \N \N +2 2 \N \N \N \N \N \N \N +3 3 \N \N \N \N 3 \N \N +4 4 \N \N \N \N 4 \N \N +5 5 \N \N \N \N 5 \N \N +6 6 \N \N \N str_6 \N \N \N +7 7 \N \N \N str_7 \N \N \N +8 8 \N \N \N str_8 \N \N \N +9 9 \N \N \N \N \N \N \N +10 10 \N \N \N \N \N \N \N +11 11 \N \N \N \N \N \N \N +12 12 \N \N \N \N 12 \N \N +13 13 \N \N \N str_13 \N \N \N +14 14 \N \N \N \N \N \N \N +15 15 \N \N \N \N \N \N \N +16 16 \N \N \N 16 \N \N \N +17 17 \N \N \N str_17 \N \N \N +18 18 \N \N \N 1970-01-19 \N \N \N +19 19 \N \N \N \N \N \N \N +20 20 \N \N \N \N 20 \N \N +21 21 \N \N \N str_21 \N \N \N +22 22 \N \N \N \N \N 1970-01-23 \N 23 \N \N \N \N \N \N \N \N 24 24 24 \N \N \N \N \N \N 25 str_25 \N str_25 \N \N \N \N \N diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 7a73be20a4d..080a7e583bf 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=0" function run() { From 65a34b8f63cc0cd6fda3d454e7505a01f0a7a75a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 07:27:44 +0000 Subject: [PATCH 03/35] Fix test flakiness --- tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index 7f88b4cd347..a9e3d88b7c7 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; From be3d707a5b519e9d810bfcf65b10379b00323762 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 16:37:01 +0000 Subject: [PATCH 04/35] Update test --- .../0_stateless/03198_dynamic_read_subcolumns.reference | 9 +++++++++ .../0_stateless/03198_dynamic_read_subcolumns.sql | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference index 59d65220c83..f9fe0c2ad77 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference @@ -1,3 +1,12 @@ +QUERY id: 0 + PROJECTION COLUMNS + d.String Nullable(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: d.String, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test_dynamic + SETTINGS allow_experimental_analyzer=1 foo \N \N diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index a9e3d88b7c7..27fca179580 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,9 +1,10 @@ --- Tags: no-random-settings +-- Tags: no-random-settings, no-s3-storage SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_dynamic VALUES (1, 'foo'), (2, 1111), (3, [1, 2, 3]); +EXPLAIN QUERY TREE SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; SYSTEM DROP MARK CACHE; SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; SYSTEM DROP MARK CACHE; From a2de85427473eb3ec982a3fc79cb85fe0ab3cbcb Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 16:37:54 +0000 Subject: [PATCH 05/35] Remove allow_experimental_analyzer from the test --- tests/queries/0_stateless/03040_dynamic_type_alters_1.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 080a7e583bf..1f2a6a31ad7 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" function run() { From f0966f519b5bbc88c63f604ab97603b504023869 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Jul 2024 21:00:51 +0000 Subject: [PATCH 06/35] Revert changes in test reference --- .../03040_dynamic_type_alters_1.reference | 108 +++++++++--------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index b41645f0c4a..ca98ec0963c 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -30,16 +30,16 @@ alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N +12 12 12 12 \N \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N insert after alter modify column 1 @@ -48,16 +48,16 @@ insert after alter modify column 1 0 0 \N \N \N \N \N 1 1 \N \N \N \N \N 2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N 6 6 str_6 str_6 \N \N \N 7 7 str_7 str_7 \N \N \N 8 8 str_8 str_8 \N \N \N 9 9 \N \N \N \N \N 10 10 \N \N \N \N \N 11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N +12 12 12 12 \N \N \N 13 13 str_13 str_13 \N \N \N 14 14 \N \N \N \N \N 15 15 \N \N \N \N \N @@ -120,57 +120,57 @@ alter modify column 3 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N \N \N -1 1 \N \N \N \N \N \N \N -2 2 \N \N \N \N \N \N \N -3 3 \N \N \N \N 3 \N \N -4 4 \N \N \N \N 4 \N \N -5 5 \N \N \N \N 5 \N \N -6 6 \N \N \N str_6 \N \N \N -7 7 \N \N \N str_7 \N \N \N -8 8 \N \N \N str_8 \N \N \N -9 9 \N \N \N \N \N \N \N -10 10 \N \N \N \N \N \N \N -11 11 \N \N \N \N \N \N \N -12 12 \N \N \N \N 12 \N \N -13 13 \N \N \N str_13 \N \N \N -14 14 \N \N \N \N \N \N \N -15 15 \N \N \N \N \N \N \N -16 16 \N \N \N 16 \N \N \N -17 17 \N \N \N str_17 \N \N \N -18 18 \N \N \N 1970-01-19 \N \N \N -19 19 \N \N \N \N \N \N \N -20 20 \N \N \N \N 20 \N \N -21 21 \N \N \N str_21 \N \N \N -22 22 \N \N \N \N \N 1970-01-23 \N +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 \N \N \N \N \N \N \N -1 1 \N \N \N \N \N \N \N -2 2 \N \N \N \N \N \N \N -3 3 \N \N \N \N 3 \N \N -4 4 \N \N \N \N 4 \N \N -5 5 \N \N \N \N 5 \N \N -6 6 \N \N \N str_6 \N \N \N -7 7 \N \N \N str_7 \N \N \N -8 8 \N \N \N str_8 \N \N \N -9 9 \N \N \N \N \N \N \N -10 10 \N \N \N \N \N \N \N -11 11 \N \N \N \N \N \N \N -12 12 \N \N \N \N 12 \N \N -13 13 \N \N \N str_13 \N \N \N -14 14 \N \N \N \N \N \N \N -15 15 \N \N \N \N \N \N \N -16 16 \N \N \N 16 \N \N \N -17 17 \N \N \N str_17 \N \N \N -18 18 \N \N \N 1970-01-19 \N \N \N -19 19 \N \N \N \N \N \N \N -20 20 \N \N \N \N 20 \N \N -21 21 \N \N \N str_21 \N \N \N -22 22 \N \N \N \N \N 1970-01-23 \N +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N 23 \N \N \N \N \N \N \N \N 24 24 24 \N \N \N \N \N \N 25 str_25 \N str_25 \N \N \N \N \N From d821dfe9033357ae6038f3f7b16d87fd91e05da9 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 6 Jul 2024 15:03:13 +0000 Subject: [PATCH 07/35] Fix tests --- src/Interpreters/getColumnFromBlock.cpp | 2 +- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 2e70a58b5a1..89166bb2b3e 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -40,7 +40,7 @@ ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requ auto subcolumn_name = requested_subcolumn.getSubcolumnName(); /// If requested subcolumn is dynamic, we should first perform cast and then /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. - if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + if ((elem->type->hasDynamicSubcolumns() || requested_column_type->hasDynamicSubcolumns()) && !elem->type->equals(*requested_column_type)) { auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 65517061b99..35d4f3a02cf 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" function test() From fc3ae09a11c45346f18ec39094adc448dcb9ec45 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 6 Jul 2024 21:24:56 +0200 Subject: [PATCH 08/35] Fix typo in the test --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 35d4f3a02cf..a1cb35e4609 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 30bb03d6c327d15c7161ca18b35b9edf04310de4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:13:44 +0200 Subject: [PATCH 09/35] Update test --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index a1cb35e4609..65517061b99 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 0f38756f4e02061ddb4c144c2d80e574ccb97cbd Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:33:23 +0200 Subject: [PATCH 10/35] Disable optimize_functions_to_subcolumns in 03036_dynamic_read_subcolumns --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index 65517061b99..a1cb35e4609 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 22154aa079132184183659ebee80abcc855be403 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Jul 2024 14:36:31 +0000 Subject: [PATCH 11/35] Disable optimize_functions_to_subcolumns in 03202_dynamic_null_map_subcolumn --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index aa06e48376c..d357419f03a 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" function test() From b60151c1d8e8e2d0fe5ea0946bdbdbfae9abecc1 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 10 Jul 2024 10:13:49 +0000 Subject: [PATCH 12/35] Remove optimize_functions_to_subcolumns setting from test --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index d357419f03a..aa06e48376c 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 25157fd17d27429cf27d4cd1c9e6e87be37d6798 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:01:42 +0200 Subject: [PATCH 13/35] Disable optimize_functions_to_subcolumns in test 03202_dynamic_null_map_subcolumn --- tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index aa06e48376c..968c9e5271f 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() From 8c2e4900e301b7b54c8c266f2c2c8ef4a4ef5e2c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 10:48:34 +0000 Subject: [PATCH 14/35] Introduce isServerConstant() flag to functions. --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 6 +- src/Functions/IFunction.h | 12 ++++ src/Functions/IFunctionAdaptors.h | 2 + src/Functions/getScalar.cpp | 4 ++ src/Functions/serverConstants.cpp | 56 +++++++++++-------- src/Planner/PlannerExpressionAnalysis.cpp | 6 +- .../02992_analyzer_group_by_const.sql | 43 ++++++++++++++ 7 files changed, 99 insertions(+), 30 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..818e75f9594 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -3410,14 +3410,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_base = function->build(argument_columns); /// Do not constant fold get scalar functions - bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || - function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; + // bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || + // function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; /** If function is suitable for constant folding try to convert it to constant. * Example: SELECT plus(1, 1); * Result: SELECT 2; */ - if (function_base->isSuitableForConstantFolding() && !disable_constant_folding) + if (function_base->isSuitableForConstantFolding()) // && !disable_constant_folding) { auto result_type = function_base->getResultType(); auto executable_function = function_base->prepare(argument_columns); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index a66456cabee..23aab420338 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -230,6 +230,17 @@ public: virtual bool isDeterministicInScopeOfQuery() const { return true; } + /** This is a special flags for functions which return constant value for the server, + * but the result could be different for different servers in distributed query. + * + * This fuctions can't support constant folding on the initiator, but can on the follower. + * We can't apply some optimizations as well (e.g. can't remove constant result from GROUP BY key). + * So, it is convenient to have a special flag for them. + * + * Examples are: "__getScalar" and every function from serverConstants.cpp + */ + virtual bool isServerConstant() const { return false; } + /** Lets you know if the function is monotonic in a range of values. * This is used to work with the index in a sorted chunk of data. * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`. @@ -488,6 +499,7 @@ public: virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const { return false; } virtual bool isDeterministic() const { return true; } virtual bool isDeterministicInScopeOfQuery() const { return true; } + virtual bool isServerConstant() const { return false; } virtual bool isStateful() const { return false; } using ShortCircuitSettings = IFunctionBase::ShortCircuitSettings; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 04bd03a776e..c9929a083c1 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -86,6 +86,8 @@ public: bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + bool isServerConstant() const override { return function->isServerConstant(); } + bool isShortCircuit(ShortCircuitSettings & settings, size_t number_of_arguments) const override { return function->isShortCircuit(settings, number_of_arguments); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & args) const override { return function->isSuitableForShortCircuitArgumentsExecution(args); } diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index 7196cbc0a36..5131dca962e 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -49,6 +49,8 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isServerConstant() const override { return true; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1 || !isString(arguments[0].type) || !arguments[0].column || !isColumnConst(*arguments[0].column)) @@ -105,6 +107,8 @@ public: bool isDeterministic() const override { return false; } + bool isServerConstant() const override { return true; } + bool isSuitableForConstantFolding() const override { return !is_distributed; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index e7e423058f1..1e2c8c48bab 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -20,117 +20,125 @@ namespace DB namespace { + template + class FunctionServerConstantBase : public FunctionConstantBase + { + public: + using FunctionConstantBase::FunctionConstantBase; + bool isServerConstant() const override { return true; } + }; + #if defined(__ELF__) && !defined(OS_FREEBSD) /// buildId() - returns the compiler build id of the running binary. - class FunctionBuildId : public FunctionConstantBase + class FunctionBuildId : public FunctionServerConstantBase { public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionServerConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} }; #endif /// Get the host name. It is constant on single server, but is not constant in distributed queries. - class FunctionHostName : public FunctionConstantBase + class FunctionHostName : public FunctionServerConstantBase { public: static constexpr auto name = "hostName"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} + explicit FunctionHostName(ContextPtr context) : FunctionServerConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} }; - class FunctionServerUUID : public FunctionConstantBase + class FunctionServerUUID : public FunctionServerConstantBase { public: static constexpr auto name = "serverUUID"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(ServerUUID::get(), context->isDistributed()) {} + explicit FunctionServerUUID(ContextPtr context) : FunctionServerConstantBase(ServerUUID::get(), context->isDistributed()) {} }; - class FunctionTCPPort : public FunctionConstantBase + class FunctionTCPPort : public FunctionServerConstantBase { public: static constexpr auto name = "tcpPort"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTCPPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} + explicit FunctionTCPPort(ContextPtr context) : FunctionServerConstantBase(context->getTCPPort(), context->isDistributed()) {} }; /// Returns timezone for current session. - class FunctionTimezone : public FunctionConstantBase + class FunctionTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} }; /// Returns the server time zone (timezone in which server runs). - class FunctionServerTimezone : public FunctionConstantBase + class FunctionServerTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "serverTimezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} + explicit FunctionServerTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} }; /// Returns server uptime in seconds. - class FunctionUptime : public FunctionConstantBase + class FunctionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "uptime"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} + explicit FunctionUptime(ContextPtr context) : FunctionServerConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} }; /// version() - returns the current version as a string. - class FunctionVersion : public FunctionConstantBase + class FunctionVersion : public FunctionServerConstantBase { public: static constexpr auto name = "version"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {} + explicit FunctionVersion(ContextPtr context) : FunctionServerConstantBase(VERSION_STRING, context->isDistributed()) {} }; /// revision() - returns the current revision. - class FunctionRevision : public FunctionConstantBase + class FunctionRevision : public FunctionServerConstantBase { public: static constexpr auto name = "revision"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionRevision(ContextPtr context) : FunctionConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} + explicit FunctionRevision(ContextPtr context) : FunctionServerConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} }; - class FunctionZooKeeperSessionUptime : public FunctionConstantBase + class FunctionZooKeeperSessionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "zookeeperSessionUptime"; explicit FunctionZooKeeperSessionUptime(ContextPtr context) - : FunctionConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) + : FunctionServerConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) { } static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionGetOSKernelVersion : public FunctionConstantBase + class FunctionGetOSKernelVersion : public FunctionServerConstantBase { public: static constexpr auto name = "getOSKernelVersion"; - explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} + explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionServerConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionDisplayName : public FunctionConstantBase + class FunctionDisplayName : public FunctionServerConstantBase { public: static constexpr auto name = "displayName"; - explicit FunctionDisplayName(ContextPtr context) : FunctionConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} + explicit FunctionDisplayName(ContextPtr context) : FunctionServerConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) {return std::make_shared(context); } }; } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ceb506d1bbb..f0de3e69113 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -85,14 +85,14 @@ bool canRemoveConstantFromGroupByKey(const ConstantNode & root) else if (function_node) { /// Do not allow removing constants like `hostName()` - if (!function_node->getFunctionOrThrow()->isDeterministic()) + if (function_node->getFunctionOrThrow()->isServerConstant()) return false; for (const auto & child : function_node->getArguments()) nodes.push(child.get()); } - else - return false; + // else + // return false; } return true; diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index ede6e0deed9..89049515a37 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -30,3 +30,46 @@ SELECT min(dummy) FROM remote('127.0.0.{2,3}', system.one) GROUP BY y; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1 + 1 FORMAT Null; + +CREATE TABLE ttt (hr DateTime, ts DateTime) ENGINE=Memory +as select '2000-01-01' d, d; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{1,2}', default, ttt) +GROUP BY c1, c2, c3 FORMAT Null; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{3,2}', default, ttt) +GROUP BY c1, c2, c3 FORMAT Null; From dbfc9fc71fb32a8bc14d50ee6f15db224157fd22 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 13:04:41 +0200 Subject: [PATCH 15/35] Update 03036_dynamic_read_subcolumns.sh --- tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh index a1cb35e4609..65517061b99 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" function test() From 1e9f47707fc7b47703c0e2957da09192bd044e81 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 12 Jul 2024 13:05:40 +0200 Subject: [PATCH 16/35] Fix optimizing array size to subcolumns in dynamic subcolumn --- src/Storages/ColumnsDescription.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index c07583cd39d..da749812167 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -701,15 +701,15 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns auto jt = subcolumns.get<0>().find(column_name); if (jt != subcolumns.get<0>().end()) return *jt; - } - /// Check for dynamic subcolumns. - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) - { - if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) - return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } } return {}; From 64d2b01d4a1a56f7cab978142cb15cf71ff0b4a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 15:51:16 +0000 Subject: [PATCH 17/35] Fixing style --- src/Functions/IFunction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 23aab420338..12931b51df2 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -233,7 +233,7 @@ public: /** This is a special flags for functions which return constant value for the server, * but the result could be different for different servers in distributed query. * - * This fuctions can't support constant folding on the initiator, but can on the follower. + * This functions can't support constant folding on the initiator, but can on the follower. * We can't apply some optimizations as well (e.g. can't remove constant result from GROUP BY key). * So, it is convenient to have a special flag for them. * From 00fa457a5d14a96b9f3e7ed3b937506f07476789 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 17:06:07 +0000 Subject: [PATCH 18/35] Fixing test. --- src/Functions/getMacro.cpp | 2 + .../02992_analyzer_group_by_const.sql | 52 +++++++++---------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/Functions/getMacro.cpp b/src/Functions/getMacro.cpp index 8172fc8ba2e..b7f0b34d652 100644 --- a/src/Functions/getMacro.cpp +++ b/src/Functions/getMacro.cpp @@ -53,6 +53,8 @@ public: /// getMacro may return different values on different shards/replicas, so it's not constant for distributed query bool isSuitableForConstantFolding() const override { return !is_distributed; } + bool isServerConstant() const override { return true; } + size_t getNumberOfArguments() const override { return 1; diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index 89049515a37..2a9e673d7bc 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -31,39 +31,39 @@ SELECT FROM remote('127.0.0.{2,3}', system.one) GROUP BY y; -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{1,2}', default, ttt) -GROUP BY c1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{3,2}', default, ttt) -GROUP BY c1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{1,2}', default, ttt) -GROUP BY c1 + 1 FORMAT Null; - -SELECT - count(), - now() AS c1 -FROM remote('127.0.0.{3,2}', default, ttt) -GROUP BY c1 + 1 FORMAT Null; - CREATE TABLE ttt (hr DateTime, ts DateTime) ENGINE=Memory as select '2000-01-01' d, d; +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + SELECT count(), tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 -FROM remote('127.0.0.{1,2}', default, ttt) +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) GROUP BY c1, c2, c3 FORMAT Null; SELECT @@ -71,5 +71,5 @@ SELECT tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 -FROM remote('127.0.0.{3,2}', default, ttt) +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) GROUP BY c1, c2, c3 FORMAT Null; From 5bce8086a86716f03ceb36f22412ed21852c610e Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 19:02:20 +0200 Subject: [PATCH 19/35] CI: Create release workflow updates --- tests/ci/create_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 7f4cf8c787a..cd01b646ebe 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -120,7 +120,7 @@ class ReleaseInfo: expected_version = copy(version) expected_version.bump() expected_tag_prefix = ( - f"v{expected_version.major}.{expected_version.minor}-" + f"v{expected_version.major}.{expected_version.minor}." ) expected_tag_suffix = "-new" else: From 2ddd0078dcf5f33004cfb558328303c29306a32a Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 21:07:41 +0200 Subject: [PATCH 20/35] rename reusable docker workflow --- .github/workflows/backport_branches.yml | 2 +- .github/workflows/create_release.yml | 2 +- .../workflows/{reusable_docker.yml => docker_test_images.yml} | 0 .github/workflows/master.yml | 2 +- .github/workflows/merge_queue.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/pull_request.yml | 2 +- .github/workflows/release_branches.yml | 2 +- tests/ci/create_release.py | 2 ++ 9 files changed, 9 insertions(+), 7 deletions(-) rename .github/workflows/{reusable_docker.yml => docker_test_images.yml} (100%) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 64c3d2f8342..60bd79560eb 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -62,7 +62,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 972aff90195..d8d27531f28 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -94,7 +94,7 @@ jobs: echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - - name: Generate ChangeLog + - name: Create ChangeLog PR if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} uses: peter-evans/create-pull-request@v6 with: diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/docker_test_images.yml similarity index 100% rename from .github/workflows/reusable_docker.yml rename to .github/workflows/docker_test_images.yml diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2a7e6f737ab..d27b1987532 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -58,7 +58,7 @@ jobs: # BuildDockers: # needs: [RunConfig] # if: ${{ !failure() && !cancelled() }} -# uses: ./.github/workflows/reusable_docker.yml +# uses: ./.github/workflows/docker_test_images.yml # with: # data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 01685ee1f5a..c08c3fb30ac 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -51,7 +51,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3e1c5576e7d..bffe5b4c1bf 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -40,7 +40,7 @@ jobs: } >> "$GITHUB_OUTPUT" BuildDockers: needs: [RunConfig] - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c9f4f858825..04ce4d29ce9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -72,7 +72,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6bf846d7535..e1a97b91016 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -57,7 +57,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index cd01b646ebe..f377e12d11c 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -618,6 +618,8 @@ sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip sudo apt install --yes python3-boto3 sudo apt install --yes python3-github sudo apt install --yes python3-unidiff +sudo apt install --yes python3-tqdm # cloud changelog +sudo apt install --yes python3-thefuzz # cloud changelog sudo apt install --yes s3fs ### INSTALL AWS CLI From 4fd832fe482eebd909e642fe2ae2e7468cccebe2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 15 Jul 2024 09:57:32 +0200 Subject: [PATCH 21/35] Fix 02918_parallel_replicas_custom_key_unavailable_replica --- ...02918_parallel_replicas_custom_key_unavailable_replica.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql index b9bc6974c47..c36b5bebd58 100644 --- a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -6,6 +6,9 @@ INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM num SET prefer_localhost_replica=0; +--- if we try to query unavaialble replica, connection will be retried +--- but a warning log message will be printed out +SET send_logs_level='error'; -- { echoOn } SELECT y, count() FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) @@ -26,5 +29,6 @@ GROUP BY y ORDER BY y SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; -- { echoOff } +SET send_logs_level='warning'; DROP TABLE 02918_parallel_replicas; From 2a0253d1e27ea3e55a54eb42da7b8aca9987edf9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Mon, 15 Jul 2024 13:14:29 +0200 Subject: [PATCH 22/35] Tests: rename bad log names --- docker/test/stateful/run.sh | 2 +- docker/test/stateless/run.sh | 2 +- docker/test/stateless/setup_hdfs_minicluster.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 1f28d1ac6ea..35ffeee5438 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -25,7 +25,7 @@ source /utils.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & ./setup_minio.sh stateful -./mc admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ae7ed1bcd20..c0bfc12bc75 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -54,7 +54,7 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -m./c admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/docker/test/stateless/setup_hdfs_minicluster.sh index 6671e73562a..15a54f59096 100755 --- a/docker/test/stateless/setup_hdfs_minicluster.sh +++ b/docker/test/stateless/setup_hdfs_minicluster.sh @@ -10,7 +10,7 @@ cd hadoop-3.3.1 export JAVA_HOME=/usr mkdir -p target/test/data chown clickhouse ./target/test/data -sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/garbage.log 2>&1 & +sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/hdfs_minicluster.log 2>&1 & while ! nc -z localhost 12222; do sleep 1 From de029e278ad4461c7ce981146fdc1dcf18ff0b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 15 Jul 2024 13:30:48 +0200 Subject: [PATCH 23/35] Add additional log masking --- tests/ci/functional_test_check.py | 14 ++++++++++++++ tests/ci/stress_check.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ef9f4dc016e..3531a55f8ae 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -24,6 +24,18 @@ from tee_popen import TeePopen NO_CHANGES_MSG = "Nothing to run" +class SensitiveFormatter(logging.Formatter): + @staticmethod + def _filter(s): + return re.sub( + r"(.*)(AZURE_CONNECTION_STRING.*\')(.*)", r"\1AZURE_CONNECTION_STRING\3", s + ) + + def format(self, record): + original = logging.Formatter.format(self, record) + return self._filter(original) + + def get_additional_envs( check_name: str, run_by_hash_num: int, run_by_hash_total: int ) -> List[str]: @@ -213,6 +225,8 @@ def parse_args(): def main(): logging.basicConfig(level=logging.INFO) + for handler in logging.root.handlers: + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) stopwatch = Stopwatch() diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 486bfc25e22..83a410551a1 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -3,6 +3,7 @@ import csv import logging import os +import re import subprocess import sys from pathlib import Path @@ -19,6 +20,18 @@ from stopwatch import Stopwatch from tee_popen import TeePopen +class SensitiveFormatter(logging.Formatter): + @staticmethod + def _filter(s): + return re.sub( + r"(.*)(AZURE_CONNECTION_STRING.*\')(.*)", r"\1AZURE_CONNECTION_STRING\3", s + ) + + def format(self, record): + original = logging.Formatter.format(self, record) + return self._filter(original) + + def get_additional_envs(check_name: str) -> List[str]: result = [] azure_connection_string = get_parameter_from_ssm("azure_connection_string") @@ -117,6 +130,8 @@ def process_results( def run_stress_test(docker_image_name: str) -> None: logging.basicConfig(level=logging.INFO) + for handler in logging.root.handlers: + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) stopwatch = Stopwatch() temp_path = Path(TEMP_PATH) From 6f036fad598bd0fb6903802c3df709f15b97c4cd Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 13:35:20 +0200 Subject: [PATCH 24/35] CI: Fix ci await function --- tests/ci/ci_cache.py | 17 ++--- tests/ci/test_ci_config.py | 128 +++++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 52 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 417d5dbc262..9486a286a8d 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -763,22 +763,13 @@ class CiCache: # TIMEOUT * MAX_ROUNDS_TO_WAIT must be less than 6h (GH job timeout) with a room for rest RunConfig work TIMEOUT = 3000 # 50 min MAX_ROUNDS_TO_WAIT = 6 - MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - def _has_build_job(): - for job in self.jobs_to_wait: - if CI.is_build_job(job): - return True - return False - if not is_release: # in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough MAX_ROUNDS_TO_WAIT = 2 - while ( - len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job() - ) and round_cnt < MAX_ROUNDS_TO_WAIT: + while round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", @@ -820,6 +811,10 @@ class CiCache: f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" ) job_config.batches.remove(batch) + if not job_config.batches: + print(f"Remove job [{job_name}] from jobs_to_do") + self.jobs_to_skip.append(job_name) + del self.jobs_to_do[job_name] else: print( f"NOTE: Job [{job_name}:{batch}] finished failed - do not add to ready" @@ -830,9 +825,7 @@ class CiCache: await_finished.add(job_name) for job in await_finished: - self.jobs_to_skip.append(job) del self.jobs_to_wait[job] - del self.jobs_to_do[job] if not dry_run: expired_sec = int(time.time()) - start_at diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index c901994affa..4336783e0d5 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import copy import unittest import random @@ -416,6 +416,30 @@ class TestCIConfig(unittest.TestCase): """ checks ci.py job configuration """ + + def _reset_ci_cache_to_wait_all_jobs(ci_cache): + # pretend there are pending jobs that we need to wait + ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) + for job, config in ci_cache.jobs_to_wait.items(): + assert config.batches + config.pending_batches = list(config.batches) + + for batch in range(config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.PENDING, + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.PENDING.value: + records_[record.to_str_key()] = record + assert not ci_cache.jobs_to_skip + assert ci_cache.jobs_to_wait + ci_cache.jobs_to_skip = [] + settings = CiSettings() settings.no_ci_cache = True pr_info = PRInfo(github_event=_TEST_EVENT_JSON) @@ -432,26 +456,6 @@ class TestCIConfig(unittest.TestCase): assert not ci_cache.jobs_to_skip assert not ci_cache.jobs_to_wait - # pretend there are pending jobs that we need to wait - ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) - for job, config in ci_cache.jobs_to_wait.items(): - assert not config.pending_batches - assert config.batches - config.pending_batches = list(config.batches) - for job, config in ci_cache.jobs_to_wait.items(): - for batch in range(config.num_batches): - record = CiCache.Record( - record_type=CiCache.RecordType.PENDING, - job_name=job, - job_digest=ci_cache.job_digests[job], - batch=batch, - num_batches=config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.PENDING.value: - records_[record.to_str_key()] = record - def _test_await_for_batch( ci_cache: CiCache, record_type: CiCache.RecordType, batch: int ) -> None: @@ -477,32 +481,76 @@ class TestCIConfig(unittest.TestCase): and batch < config_.num_batches ): assert batch not in config_.pending_batches - else: - assert batch in config_.pending_batches for _, config_ in ci_cache.jobs_to_do.items(): # jobs to do must have batches to run before/after await # if it's an empty list after await - apparently job has not been removed after await assert config_.batches - _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) - # check all one-batch jobs are in jobs_to_skip - for job in all_jobs_in_wf: - config = CI.JOB_CONFIGS[job] - if config.num_batches == 1: - self.assertTrue(job in ci_cache.jobs_to_skip) - self.assertTrue(job not in ci_cache.jobs_to_do) - else: - self.assertTrue(job not in ci_cache.jobs_to_skip) - self.assertTrue(job in ci_cache.jobs_to_do) - - _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 1) - _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) - - self.assertTrue(len(ci_cache.jobs_to_skip) > 0) - self.assertTrue(len(ci_cache.jobs_to_do) > 0) + _reset_ci_cache_to_wait_all_jobs(ci_cache) + _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 0) + tested = False + for job, config in ci_cache.jobs_to_do.items(): + if config.batches == [0]: + tested = True + self.assertTrue( + job not in ci_cache.jobs_to_wait, + "Job must be removed from @jobs_to_wait, because its only batch has FAILED cache record", + ) self.assertCountEqual( - list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf + ci_cache.jobs_to_skip, + [], + "No jobs must be skipped, since all cache records are of type FAILED", + ) + assert tested + + # reset jobs_to_wait after previous test + _reset_ci_cache_to_wait_all_jobs(ci_cache) + assert not ci_cache.jobs_to_skip + + # set batch 0 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = [] + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, + ) + + # set batch 1 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = list(ci_cache.jobs_to_skip) + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 1) + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, + ) + + # set batch 3, 4, 5, 6 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = list(ci_cache.jobs_to_skip) + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 3) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 4) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 5) + self.assertTrue( + not ci_cache.jobs_to_do + ) # by this moment there must be no jobs left as batch 5 is currently the maximum + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, ) def test_ci_py_filters_not_affected_jobs_in_prs(self): From b6b4b046a39994242abbd7d8637ddfb17b0c60a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 15 Jul 2024 13:42:00 +0200 Subject: [PATCH 25/35] Style --- tests/ci/functional_test_check.py | 3 ++- tests/ci/stress_check.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 3531a55f8ae..41c7ed963c9 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -226,7 +226,8 @@ def parse_args(): def main(): logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: - handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) + # pylint: disable=protected-access + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) # type: ignore stopwatch = Stopwatch() diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 83a410551a1..85da601e379 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -131,7 +131,8 @@ def process_results( def run_stress_test(docker_image_name: str) -> None: logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: - handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) + # pylint: disable=protected-access + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) # type: ignore stopwatch = Stopwatch() temp_path = Path(TEMP_PATH) From d768251f99840ca468af835d07347c9870ca7a34 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 13:43:07 +0200 Subject: [PATCH 26/35] fix handling of skipped build --- tests/ci/ci.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8dcf3fc4c69..b17d4a0325e 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1068,6 +1068,15 @@ def main() -> int: if build_result: if build_result.status == SUCCESS: previous_status = build_result.status + JobReport( + status=SUCCESS, + description="", + test_results=[], + start_time="", + duration=0.0, + additional_files=[], + job_skipped=True, + ).dump() else: # FIXME: Consider reusing failures for build jobs. # Just remove this if/else - that makes build job starting and failing immediately From 31fdfbc737093797905ac0fe3b7fbfba9e09854d Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 13:59:57 +0200 Subject: [PATCH 27/35] add fetching exit code if job was killed --- .github/workflows/reusable_test.yml | 3 +++ tests/ci/ci.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index c01dd8ca9d4..3e88ba9a508 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -102,6 +102,9 @@ jobs: --job-name '${{inputs.test_name}}' \ --run \ --run-command '''${{inputs.run_command}}''' + if [ "$?" != 0 ]; then + echo JOB_EXIT_CODE="$?" >> "$GITHUB_ENV" + fi - name: Post run if: ${{ !cancelled() }} run: | diff --git a/tests/ci/ci.py b/tests/ci/ci.py index b17d4a0325e..5fc3e2978ca 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1274,10 +1274,15 @@ def main() -> int: elif job_report.pre_report: print(f"ERROR: Job was killed - generate evidence") job_report.update_duration() - # Job was killed! + ret_code = os.getenv("JOB_EXIT_CODE", "") + if ret_code: + try: + job_report.exit_code = int(ret_code) + except ValueError: + pass if Utils.is_killed_with_oom(): print("WARNING: OOM while job execution") - error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s" + error = f"Out Of Memory, exit_code {job_report.exit_code}, after {int(job_report.duration)}s" else: error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) From 1e6edbd4aeaa2602c66f3dbff66cfac78aa05a9d Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 14:14:44 +0200 Subject: [PATCH 28/35] ci buddy message format updates --- tests/ci/ci_buddy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index ea690bb602c..363da121840 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -26,6 +26,7 @@ class CIBuddy: self.pr_number = pr_info.number self.head_ref = pr_info.head_ref self.commit_url = pr_info.commit_html_url + self.sha = pr_info.sha[:10] @staticmethod def _get_webhooks(): @@ -69,8 +70,8 @@ class CIBuddy: line_err = f":red_circle: *Error: {error_description}*\n\n" line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" line_job = f" *Job:* `{job_name}`\n" - line_pr_ = f" *PR:* \n" - line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + line_pr_ = f" *PR:* , <{self.commit_url}|{self.sha}>\n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|{self.sha}>\n" message = line_err message += line_job if with_instance_info: From f7dfa21bfff8f775c7af722b037be58c57caa76a Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 14:39:29 +0200 Subject: [PATCH 29/35] style fixes --- .github/workflows/reusable_test.yml | 3 ++- tests/ci/ci.py | 2 +- tests/ci/ci_buddy.py | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index 3e88ba9a508..05b0aa88ba4 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -102,7 +102,8 @@ jobs: --job-name '${{inputs.test_name}}' \ --run \ --run-command '''${{inputs.run_command}}''' - if [ "$?" != 0 ]; then + exit_code = "$?" + if [ "$exit_code" != 0 ]; then echo JOB_EXIT_CODE="$?" >> "$GITHUB_ENV" fi - name: Post run diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 5fc3e2978ca..cf285f4b97d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1284,7 +1284,7 @@ def main() -> int: print("WARNING: OOM while job execution") error = f"Out Of Memory, exit_code {job_report.exit_code}, after {int(job_report.duration)}s" else: - error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" + error = f"Unknown, exit_code {job_report.exit_code}, after {int(job_report.duration)}s" CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 363da121840..c650b876610 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -71,7 +71,9 @@ class CIBuddy: line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" line_job = f" *Job:* `{job_name}`\n" line_pr_ = f" *PR:* , <{self.commit_url}|{self.sha}>\n" - line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|{self.sha}>\n" + line_br_ = ( + f" *Branch:* `{self.head_ref}`, <{self.commit_url}|{self.sha}>\n" + ) message = line_err message += line_job if with_instance_info: @@ -86,4 +88,4 @@ class CIBuddy: if __name__ == "__main__": # test buddy = CIBuddy(dry_run=True) - buddy.post_error("Out of memory") + buddy.post_error("TEst") From 43b3a133e9c908e1850ef643b22a2331181a6cdb Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 14:57:40 +0200 Subject: [PATCH 30/35] add prints to check_mergeable_state --- .github/workflows/reusable_test.yml | 6 ++---- tests/ci/merge_pr.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index 05b0aa88ba4..ca6df5df14e 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -102,10 +102,8 @@ jobs: --job-name '${{inputs.test_name}}' \ --run \ --run-command '''${{inputs.run_command}}''' - exit_code = "$?" - if [ "$exit_code" != 0 ]; then - echo JOB_EXIT_CODE="$?" >> "$GITHUB_ENV" - fi + # shellcheck disable=SC2319 + echo "JOB_EXIT_CODE=$?" >> "$GITHUB_ENV" - name: Post run if: ${{ !cancelled() }} run: | diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 59749abb4fa..94456506879 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -260,18 +260,29 @@ def main(): failed_to_get_info = False has_failed_statuses = False for status in statuses: - if not CI.is_required(status.context): + if not CI.is_required(status.context) or status.context in ( + CI.StatusNames.SYNC, + CI.StatusNames.PR_CHECK, + ): + # CI.StatusNames.SYNC or CI.StatusNames.PR_CHECK should not be checked continue + print(f"Check status [{status.context}], [{status.state}]") if status.state == FAILURE: has_failed_statuses = True failed_cnt = Utils.get_failed_tests_number(status.description) if failed_cnt is None: failed_to_get_info = True + print( + f"WARNING: failed to get number of failed tests from [{status.description}]" + ) else: if failed_cnt > max_failed_tests_per_job: job_name_with_max_failures = status.context max_failed_tests_per_job = failed_cnt total_failed_tests += failed_cnt + print( + f"Failed test cases in [{status.context}] is [{failed_cnt}], total failures [{total_failed_tests}]" + ) elif status.state != SUCCESS and status.context not in ( CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK, From 50f068efe930e264de938da46040a1e056200032 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 17:00:53 +0000 Subject: [PATCH 31/35] add dedicated runner to libfuzzer, update docker --- docker/test/libfuzzer/run_libfuzzer.py | 70 +--------------------- tests/ci/build_download_helper.py | 2 +- tests/fuzz/build.sh | 3 + utils/libfuzzer/runner.py | 80 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 69 deletions(-) create mode 100644 utils/libfuzzer/runner.py diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index fa67805dfa5..25d5ede028d 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -1,77 +1,11 @@ #!/usr/bin/env python3 -import configparser -import logging -import os -from pathlib import Path -import subprocess - -DEBUGGER = os.getenv("DEBUGGER", "") -FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") - - -def run_fuzzer(fuzzer: str): - logging.info(f"Running fuzzer {fuzzer}...") - - corpus_dir = f"{fuzzer}.in" - with Path(corpus_dir) as path: - if not path.exists() or not path.is_dir(): - corpus_dir = "" - - options_file = f"{fuzzer}.options" - custom_libfuzzer_options = "" - - with Path(options_file) as path: - if path.exists() and path.is_file(): - parser = configparser.ConfigParser() - parser.read(path) - - if parser.has_section("asan"): - os.environ["ASAN_OPTIONS"] = ( - f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" - ) - - if parser.has_section("msan"): - os.environ["MSAN_OPTIONS"] = ( - f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" - ) - - if parser.has_section("ubsan"): - os.environ["UBSAN_OPTIONS"] = ( - f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" - ) - - if parser.has_section("libfuzzer"): - custom_libfuzzer_options = " ".join( - "-%s=%s" % (key, value) - for key, value in parser["libfuzzer"].items() - ) - - cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" - if custom_libfuzzer_options: - cmd_line += f" {custom_libfuzzer_options}" - - if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): - cmd_line += f" -dict={fuzzer}.dict" - - cmd_line += " < /dev/null" - - logging.info(f"...will execute: {cmd_line}") - subprocess.check_call(cmd_line, shell=True) +import runner def main(): - logging.basicConfig(level=logging.INFO) - - subprocess.check_call("ls -al", shell=True) - - with Path() as current: - for fuzzer in current.iterdir(): - if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): - run_fuzzer(fuzzer) - + runner.run() exit(0) - if __name__ == "__main__": main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 8482abb26e0..0b7dd6dd0c9 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,5 +275,5 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip")), + lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py")), ) diff --git a/tests/fuzz/build.sh b/tests/fuzz/build.sh index 12f41f6e079..cf871d2de58 100755 --- a/tests/fuzz/build.sh +++ b/tests/fuzz/build.sh @@ -1,5 +1,8 @@ #!/bin/bash -eu +# copy runner +cp $SRC/utils/libfuzzer/runner.py $OUT/ + # copy fuzzer options and dictionaries cp $SRC/tests/fuzz/*.dict $OUT/ cp $SRC/tests/fuzz/*.options $OUT/ diff --git a/utils/libfuzzer/runner.py b/utils/libfuzzer/runner.py new file mode 100644 index 00000000000..435eb49383b --- /dev/null +++ b/utils/libfuzzer/runner.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +import configparser +import logging +import os +from pathlib import Path +import subprocess + +DEBUGGER = os.getenv("DEBUGGER", "") +FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") + + +def run_fuzzer(fuzzer: str): + logging.info(f"Running fuzzer {fuzzer}...") + + corpus_dir = f"{fuzzer}.in" + with Path(corpus_dir) as path: + if not path.exists() or not path.is_dir(): + corpus_dir = "" + + options_file = f"{fuzzer}.options" + custom_libfuzzer_options = "" + fuzzer_arguments = "" + + with Path(options_file) as path: + if path.exists() and path.is_file(): + parser = configparser.ConfigParser() + parser.read(path) + + if parser.has_section("asan"): + os.environ["ASAN_OPTIONS"] = ( + f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + ) + + if parser.has_section("msan"): + os.environ["MSAN_OPTIONS"] = ( + f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + ) + + if parser.has_section("ubsan"): + os.environ["UBSAN_OPTIONS"] = ( + f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + ) + + if parser.has_section("libfuzzer"): + custom_libfuzzer_options = " ".join( + "-%s=%s" % (key, value) + for key, value in parser["libfuzzer"].items() + ) + + if parser.has_section("fuzzer_arguments"): + fuzzer_arguments = " ".join( + ("%s" % key) if value == "" else ("%s=%s" % (key, value)) + for key, value in parser["fuzzer_arguments"].items() + ) + + cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" + if custom_libfuzzer_options: + cmd_line += f" {custom_libfuzzer_options}" + if fuzzer_arguments: + cmd_line += f" {fuzzer_arguments}" + + if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): + cmd_line += f" -dict={fuzzer}.dict" + + cmd_line += " < /dev/null" + + logging.info(f"...will execute: {cmd_line}") + subprocess.check_call(cmd_line, shell=True) + + +def run(): + logging.basicConfig(level=logging.INFO) + + subprocess.check_call("ls -al", shell=True) + + with Path() as current: + for fuzzer in current.iterdir(): + if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): + run_fuzzer(fuzzer) From 97c9739abcb6b6e5da113ac12a35078fe694aff6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 15 Jul 2024 17:21:50 +0000 Subject: [PATCH 32/35] Automatic style fix --- docker/test/libfuzzer/run_libfuzzer.py | 1 + tests/ci/build_download_helper.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index 25d5ede028d..06c816c8943 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -7,5 +7,6 @@ def main(): runner.run() exit(0) + if __name__ == "__main__": main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0b7dd6dd0c9..c3e516f836d 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,5 +275,7 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py")), + lambda x: x.endswith( + ("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py") + ), ) From 5fc4fada610b9d7a62528eaecb600bd5d16d48ae Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 18:18:15 +0200 Subject: [PATCH 33/35] add info about previous release --- tests/ci/ci_utils.py | 5 +++-- tests/ci/create_release.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 44bd37fe260..25875e55df6 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -49,14 +49,15 @@ class GHActions: class Shell: @classmethod def run_strict(cls, command): - subprocess.run( - command + " 2>&1", + res = subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True, ) + return res.stdout.strip() @classmethod def run(cls, command): diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index f377e12d11c..e5723e00e2a 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -14,6 +14,7 @@ from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper from autoscale_runners_lambda.lambda_shared.pr import Labels +from ci_utils import Shell from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -65,6 +66,8 @@ class ReleaseInfo: commit_sha: str # lts or stable codename: str + previous_release_tag: str + previous_release_sha: str @staticmethod def from_file(file_path: str) -> "ReleaseInfo": @@ -79,6 +82,8 @@ class ReleaseInfo: version = None release_branch = None release_tag = None + previous_release_tag = None + previous_release_sha = None codename = None assert release_type in ("patch", "new") if release_type == "new": @@ -101,6 +106,11 @@ class ReleaseInfo: codename = ( VersionType.STABLE ) # dummy value (artifactory won't be updated for new release) + previous_release_tag = expected_prev_tag + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha if release_type == "patch": with checkout(commit_ref): _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") @@ -118,6 +128,7 @@ class ReleaseInfo: ) if version.patch == 1: expected_version = copy(version) + previous_release_tag = f"v{version.major}.{version.minor}.1.1-new" expected_version.bump() expected_tag_prefix = ( f"v{expected_version.major}.{expected_version.minor}." @@ -128,6 +139,7 @@ class ReleaseInfo: f"v{version.major}.{version.minor}.{version.patch-1}." ) expected_tag_suffix = f"-{version.get_stable_release_type()}" + previous_release_tag = git.latest_tag if git.latest_tag.startswith( expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): @@ -137,8 +149,15 @@ class ReleaseInfo: False ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha + assert ( release_branch + and previous_release_tag + and previous_release_sha and commit_sha and release_tag and version @@ -150,6 +169,8 @@ class ReleaseInfo: release_tag=release_tag, version=version.string, codename=codename, + previous_release_tag=previous_release_tag, + previous_release_sha=previous_release_sha, ) with open(outfile, "w", encoding="utf-8") as f: print(json.dumps(dataclasses.asdict(res), indent=2), file=f) From 5361ade8e709f3ebe8916aa80669f4bec8b5c726 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 19:10:07 +0000 Subject: [PATCH 34/35] optimize --- docker/test/libfuzzer/Dockerfile | 2 +- docker/test/libfuzzer/run_libfuzzer.py | 12 ------------ tests/ci/build_download_helper.py | 4 +--- tests/fuzz/build.sh | 3 --- utils/libfuzzer/runner.py | 6 +++++- 5 files changed, 7 insertions(+), 20 deletions(-) delete mode 100755 docker/test/libfuzzer/run_libfuzzer.py diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index e6eb2ae336e..9ded2ca4e3a 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -39,7 +39,7 @@ ENV FUZZER_ARGS="-max_total_time=60" SHELL ["/bin/bash", "-c"] CMD set -o pipefail \ - && timeout -s 9 1h /run_libfuzzer.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log + && timeout -s 9 1h ./utils/libfuzzer/runner.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log # docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py deleted file mode 100755 index 06c816c8943..00000000000 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 - -import runner - - -def main(): - runner.run() - exit(0) - - -if __name__ == "__main__": - main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index c3e516f836d..8482abb26e0 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -275,7 +275,5 @@ def download_fuzzers( check_name, reports_path, result_path, - lambda x: x.endswith( - ("_fuzzer", ".dict", ".options", "_seed_corpus.zip", "runner.py") - ), + lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip")), ) diff --git a/tests/fuzz/build.sh b/tests/fuzz/build.sh index cf871d2de58..12f41f6e079 100755 --- a/tests/fuzz/build.sh +++ b/tests/fuzz/build.sh @@ -1,8 +1,5 @@ #!/bin/bash -eu -# copy runner -cp $SRC/utils/libfuzzer/runner.py $OUT/ - # copy fuzzer options and dictionaries cp $SRC/tests/fuzz/*.dict $OUT/ cp $SRC/tests/fuzz/*.options $OUT/ diff --git a/utils/libfuzzer/runner.py b/utils/libfuzzer/runner.py index 435eb49383b..bbe648dbbc2 100644 --- a/utils/libfuzzer/runner.py +++ b/utils/libfuzzer/runner.py @@ -69,7 +69,7 @@ def run_fuzzer(fuzzer: str): subprocess.check_call(cmd_line, shell=True) -def run(): +def main(): logging.basicConfig(level=logging.INFO) subprocess.check_call("ls -al", shell=True) @@ -78,3 +78,7 @@ def run(): for fuzzer in current.iterdir(): if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): run_fuzzer(fuzzer) + + +if __name__ == "__main__": + main() From 0176e0b2ae534fee35a6e2bcab8a74a0783e8137 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 Jul 2024 19:27:54 +0000 Subject: [PATCH 35/35] fix --- docker/test/libfuzzer/Dockerfile | 4 ---- tests/ci/libfuzzer_test_check.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index 9ded2ca4e3a..3ffae0cd921 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -33,13 +33,9 @@ RUN apt-get update \ COPY requirements.txt / RUN pip3 install --no-cache-dir -r /requirements.txt -COPY * / - ENV FUZZER_ARGS="-max_total_time=60" SHELL ["/bin/bash", "-c"] -CMD set -o pipefail \ - && timeout -s 9 1h ./utils/libfuzzer/runner.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log # docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 1f5936c3fec..d9e33229932 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -74,7 +74,8 @@ def get_run_command( f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"--volume={result_path}:/test_output " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} " + "python3 ./utils/runner.py" )