From 7f34af467e955a363debb3575b45f30cd60b64f3 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 13 Feb 2024 16:00:26 +0100 Subject: [PATCH 001/100] Add azure_cache as storage policy to tests --- .../queries/0_stateless/02240_system_filesystem_cache_table.sh | 2 +- .../0_stateless/02241_filesystem_cache_on_write_operations.sh | 2 +- .../0_stateless/02242_system_filesystem_cache_log_table.sh | 2 +- tests/queries/0_stateless/02286_drop_filesystem_cache.sh | 2 +- tests/queries/0_stateless/02313_filesystem_cache_seeks.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 6a94cffea5a..2102b7efdb2 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 96f61cf61e8..dafd9f7a640 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241" diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 4c92d1d2954..7a665d81eab 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 1e1841862e9..a2c9352b7aa 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02286" diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index f5de4346fd6..fbaec1ffaa7 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" From 0ca07617393636bff00fe107287bb841138d050e Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 14 Feb 2024 11:57:49 +0100 Subject: [PATCH 002/100] Updated output of tests --- ...40_system_filesystem_cache_table.reference | 18 +++++++++++ ...system_cache_on_write_operations.reference | 31 +++++++++++++++++++ ...ystem_filesystem_cache_log_table.reference | 4 +++ .../02286_drop_filesystem_cache.reference | 9 ++++++ .../02313_filesystem_cache_seeks.reference | 1 + 5 files changed, 63 insertions(+) diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index 93b6d4de94f..6b5dd182112 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -34,3 +34,21 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache +Using storage policy: azure_cache +0 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 157837983f7..d23a4435bdf 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -60,3 +60,34 @@ state: DOWNLOADED INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 5010500 18816 +Using storage policy: azure_cache +0 +0 0 +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 745 +size: 746 +state: DOWNLOADED +8 +8 1100 +0 +2 +2 +8 1100 +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 1659 +size: 1660 +state: DOWNLOADED +8 +8 2014 +8 2014 +8 2014 +24 84045 +32 167243 +41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +5010500 +18816 diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 99f31df7def..447e1a275fc 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -6,3 +6,7 @@ Using storage policy: local_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +Using storage policy: azure_cache +(0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index b4e5b6715de..e3875dbabe1 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -16,3 +16,12 @@ Using storage policy: local_cache 1 1 0 +Using storage policy: azure_cache +0 +2 +0 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference index 062aac259a4..0a9e1c20b59 100644 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference @@ -1,3 +1,4 @@ Using storage policy: s3_cache Using storage policy: local_cache Using storage policy: s3_cache_multi +Using storage policy: azure_cache From 7c1ac996e1900313b2eb56b7b61bf1dfd7e3cfb5 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 16 Feb 2024 11:30:39 +0100 Subject: [PATCH 003/100] Updated to add azure config to ARM --- tests/config/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index cfe810cda84..8b40d353a92 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -168,7 +168,7 @@ ARM="aarch64" OS="$(uname -m)" if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then echo "$OS" - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$OS" == "$ARM" ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then echo "Azure configuration will not be added" else echo "Adding azure configuration" From 7c9dbdbd9c01167a49767ab31cc700f3a91421ee Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Fri, 22 Mar 2024 15:32:43 +0800 Subject: [PATCH 004/100] Improve json read by ignore key case --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Impl/JSONEachRowRowInputFormat.cpp | 12 ++++++++++++ .../Formats/Impl/JSONEachRowRowInputFormat.h | 8 +++++++- .../03013_json_key_ignore_case.reference | 3 +++ .../0_stateless/03013_json_key_ignore_case.sh | 18 ++++++++++++++++++ .../data_json/key_ignore_case.json | Bin 0 -> 123 bytes 8 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03013_json_key_ignore_case.reference create mode 100644 tests/queries/0_stateless/03013_json_key_ignore_case.sh create mode 100644 tests/queries/0_stateless/data_json/key_ignore_case.json diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 375bdb1c516..777bed7b7ed 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1057,6 +1057,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 7d7a2b17a8a..ed324515697 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -145,6 +145,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index da225a39ec9..136381deff9 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -224,6 +224,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool ignore_key_case = false; } json{}; struct diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index a78d8d016cd..8570fb2e75b 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -45,6 +45,14 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( { const auto & header = getPort().getHeader(); name_map = header.getNamesToIndexesMap(); + if (format_settings_.json.ignore_key_case) + { + for (auto it = name_map.begin(); it != name_map.end(); ++it) + { + StringRef key = it->first; + transformFieldNameToLowerCase(key); + } + } if (format_settings_.import_nested_json) { for (size_t i = 0; i != header.columns(); ++i) @@ -169,6 +177,10 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) JSONUtils::skipColon(*in); skipUnknownField(name_ref); continue; + } + if (format_settings.json.ignore_key_case) + { + transformFieldNameToLowerCase(name_ref); } const size_t column_index = columnIndex(name_ref, key_index); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index d97aa2dad8d..24d585d07bc 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -55,7 +55,13 @@ private: virtual void readRowStart(MutableColumns &) {} virtual void skipRowStart() {} - + void transformFieldNameToLowerCase(StringRef & field_name) + { + if (!field_name.data) return; + char * name_data = const_cast(field_name.data); + for (size_t i = 0; i < field_name.size; ++i) + name_data[i] = std::tolower(name_data[i]); + } /// Buffer for the read from the stream field name. Used when you have to copy it. /// Also, if processing of Nested data is in progress, it holds the common prefix /// of the nested column names (so that appending the field name to it produces diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.reference b/tests/queries/0_stateless/03013_json_key_ignore_case.reference new file mode 100644 index 00000000000..54683d8fbc5 --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.reference @@ -0,0 +1,3 @@ +1 77328912 Ben +2 77328913 Jim +3 77328914 Bill diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.sh b/tests/queries/0_stateless/03013_json_key_ignore_case.sh new file mode 100644 index 00000000000..807e743b22a --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') + +cp "$CURDIR"/data_json/key_ignore_case.json $USER_FILES_PATH/ + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (id UInt16, reqid UInt32, name String) engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_ignore_key_case=true" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_json/key_ignore_case.json b/tests/queries/0_stateless/data_json/key_ignore_case.json new file mode 100644 index 0000000000000000000000000000000000000000..ad8f7cb450780891d64ac8cbbc19de17b92e7db5 GIT binary patch literal 123 zcmbzd5O8HN>)lvsd-AZT-9KOAYoL6#t4O8 VnYnO Date: Mon, 25 Mar 2024 16:26:33 +0800 Subject: [PATCH 005/100] ci fix --- .../Formats/Impl/JSONEachRowRowInputFormat.cpp | 11 ++++++++--- .../Formats/Impl/JSONEachRowRowInputFormat.h | 13 +++++++------ .../queries/0_stateless/02995_baseline_23_12_1.tsv | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8570fb2e75b..b5800ab6e3c 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -50,7 +50,8 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( for (auto it = name_map.begin(); it != name_map.end(); ++it) { StringRef key = it->first; - transformFieldNameToLowerCase(key); + String lower_case_key = transformFieldNameToLowerCase(key); + lower_case_name_map[lower_case_key] = key; } } if (format_settings_.import_nested_json) @@ -178,11 +179,15 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) skipUnknownField(name_ref); continue; } + size_t column_index = 0; if (format_settings.json.ignore_key_case) { - transformFieldNameToLowerCase(name_ref); + String lower_case_name = transformFieldNameToLowerCase(name_ref); + StringRef field_name_ref = lower_case_name_map[lower_case_name]; + column_index = columnIndex(field_name_ref, key_index); } - const size_t column_index = columnIndex(name_ref, key_index); + else + column_index = columnIndex(name_ref, key_index); if (unlikely(ssize_t(column_index) < 0)) { diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 24d585d07bc..8a1cef8fa9f 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -55,12 +55,12 @@ private: virtual void readRowStart(MutableColumns &) {} virtual void skipRowStart() {} - void transformFieldNameToLowerCase(StringRef & field_name) + String transformFieldNameToLowerCase(const StringRef & field_name) { - if (!field_name.data) return; - char * name_data = const_cast(field_name.data); - for (size_t i = 0; i < field_name.size; ++i) - name_data[i] = std::tolower(name_data[i]); + String field_name_str = field_name.toString(); + std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(), + [](unsigned char c) { return std::tolower(c); }); + return field_name_str; } /// Buffer for the read from the stream field name. Used when you have to copy it. /// Also, if processing of Nested data is in progress, it holds the common prefix @@ -80,7 +80,8 @@ private: /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map. Block::NameMap name_map; - + /// Hash table match `lower_case field name -> field name in the block`. + std::unordered_map lower_case_name_map; /// Cached search results for previous row (keyed as index in JSON object) - used as a hint. std::vector prev_positions; diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv index 4c0c9125b46..8c9add82db3 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -378,6 +378,7 @@ input_format_json_read_objects_as_strings 1 input_format_json_try_infer_named_tuples_from_objects 1 input_format_json_try_infer_numbers_from_strings 0 input_format_json_validate_types_from_metadata 1 +input_format_json_ignore_key_case 0 input_format_max_bytes_to_read_for_schema_inference 33554432 input_format_max_rows_to_read_for_schema_inference 25000 input_format_msgpack_number_of_columns 0 From bb6520e1ef072a479510b75b804ac68feaa45af5 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 25 Mar 2024 17:27:21 +0800 Subject: [PATCH 006/100] ci fix --- tests/queries/0_stateless/03013_json_key_ignore_case.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tests/queries/0_stateless/03013_json_key_ignore_case.sh diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.sh b/tests/queries/0_stateless/03013_json_key_ignore_case.sh old mode 100644 new mode 100755 From fde80081c3bac73af73b3361a8472aa5fb72c0af Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 26 Mar 2024 19:50:43 +0800 Subject: [PATCH 007/100] review fix --- src/Core/SettingsChangesHistory.h | 1 + tests/queries/0_stateless/02995_baseline_23_12_1.tsv | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index d3b5de06e70..2f7b25b9eee 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -133,6 +133,7 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv index 8c9add82db3..4c0c9125b46 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -378,7 +378,6 @@ input_format_json_read_objects_as_strings 1 input_format_json_try_infer_named_tuples_from_objects 1 input_format_json_try_infer_numbers_from_strings 0 input_format_json_validate_types_from_metadata 1 -input_format_json_ignore_key_case 0 input_format_max_bytes_to_read_for_schema_inference 33554432 input_format_max_rows_to_read_for_schema_inference 25000 input_format_msgpack_number_of_columns 0 From 70d6a62662ff2842a84ef4138e311cb882f46836 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 3 Apr 2024 15:25:07 +0800 Subject: [PATCH 008/100] Fix build problem --- src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index b5800ab6e3c..17c808a7f70 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -47,9 +47,9 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( name_map = header.getNamesToIndexesMap(); if (format_settings_.json.ignore_key_case) { - for (auto it = name_map.begin(); it != name_map.end(); ++it) + for (auto & it : name_map) { - StringRef key = it->first; + StringRef key = it.first; String lower_case_key = transformFieldNameToLowerCase(key); lower_case_name_map[lower_case_key] = key; } From dffa4d691fbbe28c687840cc8e40c908b52d731f Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Thu, 25 Apr 2024 14:40:01 +0800 Subject: [PATCH 009/100] check style --- src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 17c808a7f70..e646b043976 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -178,7 +178,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) JSONUtils::skipColon(*in); skipUnknownField(name_ref); continue; - } + } size_t column_index = 0; if (format_settings.json.ignore_key_case) { From 3a7e338839013e46d30e27f0fe7f959d4deb66cf Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 22 May 2024 16:24:16 +0200 Subject: [PATCH 010/100] Remove extra print --- tests/config/install.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index 3eaa39fa8d3..f15990fb9c9 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -181,8 +181,7 @@ elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then ln -sf $SRC_PATH/config.d/azure_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ fi -if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then - echo "$OS" +if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then\ if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then echo "Azure configuration will not be added" else From bb2f51c51fab0942291b50194333e0a587541ac3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 3 Jun 2024 16:56:17 +0200 Subject: [PATCH 011/100] Fix filling parts columns from metadata (when columns.txt does not exists) The getFileNameForColumn() inside IMergeTreeDataPart::loadColumns() requries serializations that will be filled only at the end of loadColumns() (setColumns()): (lldb) bt * thread 718, name = 'ActiveParts', stop reason = breakpoint 1.1 * frame 0: 0x00000000111e50dc clickhouse`DB::IMergeTreeDataPart::getSerialization(this=0x00007ffd18c39918, column_name="key") const + 124 at IMergeTreeDataPart.cpp:508 frame 1: 0x0000000011340f57 clickhouse`DB::MergeTreeDataPartWide::getFileNameForColumn(this=0x00007ffd18c39918, column=) const + 55 at MergeTreeDataPartWide.cpp:301 frame 2: 0x00000000111e9a2b clickhouse`DB::IMergeTreeDataPart::loadColumns(this=0x00007ffd18c39918, require=false) + 1067 at IMergeTreeDataPart.cpp:1577 frame 3: 0x00000000111e8d52 clickhouse`DB::IMergeTreeDataPart::loadColumnsChecksumsIndexes(this=0x00007ffd18c39918, require_columns_checksums=, check_consistency=true) + 82 at IMergeTreeDataPart.cpp:713 frame 4: 0x0000000011284c10 clickhouse`DB::MergeTreeData::loadDataPart(this=0x00007ffcf9242e40, part_info=0x00007ffcf937c298, part_name="2024", part_disk_ptr=std::__1::shared_ptr::element_type @ 0x00007ffcf925af18 strong=11 weak=2, to_state=Active, part_loading_mutex=0x00007ffef97efeb0) + 3152 at MergeTreeData.cpp:1435 So let's add fallback into the getFileNameForColumn() in case of serializations was not filled yet. Note, that it is OK to add this check as a generic, since after loadColumns() serializations cannot be empty, since loadColumns() does not allows this (there is only one more caller of getFileNameForColumn() - loadIndexGranularity()) Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 149f86cef00..d91fc5dfbd8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -298,6 +298,11 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { std::optional filename; + + /// Fallback for the case when serializations was not loaded yet (called from loadColumns()) + if (getSerializations().empty()) + return getStreamNameForColumn(column, {}, DATA_FILE_EXTENSION, getDataPartStorage()); + getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (!filename.has_value()) @@ -309,6 +314,7 @@ std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndT filename = getStreamNameForColumn(column, substream_path, DATA_FILE_EXTENSION, getDataPartStorage()); } }); + return filename; } From a1cb72a283da0b2f5034864e9118719f3e61727c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 3 Jun 2024 17:19:27 +0200 Subject: [PATCH 012/100] Fix "No columns in part" check Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c276361559c..85a1e6b27be 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1577,7 +1577,7 @@ void IMergeTreeDataPart::loadColumns(bool require) if (getFileNameForColumn(column)) loaded_columns.push_back(column); - if (columns.empty()) + if (loaded_columns.empty()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); if (!is_readonly_storage) From 8d46078911194d81524bb9aaadca858e23756947 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 Jun 2024 10:59:22 +0200 Subject: [PATCH 013/100] Use primary.cidx instead of columns.txt to break parts After fixes for columns.txt the test_max_suspicious_broken_parts starts to fail Signed-off-by: Azat Khuzhin --- tests/integration/test_max_suspicious_broken_parts/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_max_suspicious_broken_parts/test.py b/tests/integration/test_max_suspicious_broken_parts/test.py index c1f34adbb62..831531ed690 100644 --- a/tests/integration/test_max_suspicious_broken_parts/test.py +++ b/tests/integration/test_max_suspicious_broken_parts/test.py @@ -25,7 +25,7 @@ def break_part(table, part_name): [ "bash", "-c", - f"rm /var/lib/clickhouse/data/default/{table}/{part_name}/columns.txt", + f"rm /var/lib/clickhouse/data/default/{table}/{part_name}/primary.cidx", ] ) From 0302741d50c3d6baf8ac64895dc74214bf1331c1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 12 Jun 2024 19:23:47 +0200 Subject: [PATCH 014/100] Support aliases in parametrized view function (only new analyzer) --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 +- src/Interpreters/Context.cpp | 8 ++- src/Interpreters/Context.h | 4 +- .../FunctionParameterValuesVisitor.cpp | 68 +++++++++++++------ src/Parsers/FunctionParameterValuesVisitor.h | 3 +- ...03167_parametrized_view_with_cte.reference | 2 + .../03167_parametrized_view_with_cte.sql | 4 ++ 7 files changed, 65 insertions(+), 28 deletions(-) create mode 100644 tests/queries/0_stateless/03167_parametrized_view_with_cte.reference create mode 100644 tests/queries/0_stateless/03167_parametrized_view_with_cte.sql diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 5e5ecaaa93a..0bf61bb508b 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4505,7 +4505,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } - auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name); + auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage( + function_ast, database_name, table_name, scope.aliases); + if (parametrized_view_storage) { auto fake_table_node = std::make_shared(parametrized_view_storage, scope_context); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 67ea069d46d..7fb1b0d0374 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -1871,7 +1872,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, ScopeAliases{}); StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; @@ -2072,7 +2073,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } -StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name) +StoragePtr Context::buildParametrizedViewStorage( + const ASTPtr & table_expression, const String & database_name, const String & table_name, const ScopeAliases & scope_aliases) { if (table_name.empty()) return nullptr; @@ -2085,7 +2087,7 @@ StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, scope_aliases); StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 68f37377926..5c0f96a04a9 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -147,6 +147,7 @@ class ServerType; template class MergeTreeBackgroundExecutor; class AsyncLoader; +struct ScopeAliases; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; @@ -743,7 +744,8 @@ public: /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); - StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name); + StoragePtr buildParametrizedViewStorage( + const ASTPtr & table_expression, const String & database_name, const String & table_name, const ScopeAliases & scope_aliases); void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 3692a4c73e5..44bf36c2526 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -4,9 +4,14 @@ #include #include #include -#include +#include #include +#include +#include +#include +#include #include +#include namespace DB @@ -20,8 +25,8 @@ namespace ErrorCodes class FunctionParameterValuesVisitor { public: - explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_) - : parameter_values(parameter_values_) + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, const ScopeAliases & aliases_) + : parameter_values(parameter_values_), aliases(aliases_) { } @@ -29,12 +34,45 @@ public: { if (const auto * function = ast->as()) visitFunction(*function); + for (const auto & child : ast->children) visit(child); } private: NameToNameMap & parameter_values; + const ScopeAliases & aliases; + + std::string tryGetParameterValueAsString(const ASTPtr & ast) + { + if (const auto * literal = ast->as()) + { + return convertFieldToString(literal->value); + } + else if (const auto * value_identifier = ast->as()) + { + auto it = aliases.alias_name_to_expression_node_before_group_by.find(value_identifier->name()); + if (it != aliases.alias_name_to_expression_node_before_group_by.end()) + { + return tryGetParameterValueAsString(it->second->toAST()); + } + } + else if (const auto * function = ast->as()) + { + if (isFunctionCast(function)) + { + const auto * cast_expression = assert_cast(function->arguments.get()); + if (cast_expression->children.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); + + if (const auto * cast_literal = cast_expression->children[0]->as()) + { + return convertFieldToString(cast_literal->value); + } + } + } + return ""; + } void visitFunction(const ASTFunction & parameter_function) { @@ -48,31 +86,17 @@ private: if (const auto * identifier = expression_list->children[0]->as()) { - if (const auto * literal = expression_list->children[1]->as()) - { - parameter_values[identifier->name()] = convertFieldToString(literal->value); - } - else if (const auto * function = expression_list->children[1]->as()) - { - if (isFunctionCast(function)) - { - const auto * cast_expression = assert_cast(function->arguments.get()); - if (cast_expression->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); - if (const auto * cast_literal = cast_expression->children[0]->as()) - { - parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); - } - } - } + auto value_str = tryGetParameterValueAsString(expression_list->children[1]); + if (!value_str.empty()) + parameter_values[identifier->name()] = value_str; } } }; -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast) +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases & scope_aliases) { NameToNameMap parameter_values; - FunctionParameterValuesVisitor(parameter_values).visit(ast); + FunctionParameterValuesVisitor(parameter_values, scope_aliases).visit(ast); return parameter_values; } diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index e6ce0e42d06..01ce79a2a76 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -6,8 +6,9 @@ namespace DB { +struct ScopeAliases; /// Find parameters in a query parameter values and collect them into map. -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast); +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases & scope_aliases); } diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference new file mode 100644 index 00000000000..90afb158f23 --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -0,0 +1,2 @@ +OK +123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql new file mode 100644 index 00000000000..433f4ed040b --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer=1; +CREATE OR REPLACE VIEW param_test AS SELECT {test_str:String} as s_result; +WITH 'OK' AS s SELECT * FROM param_test(test_str=s); +WITH CAST(123, String) AS s SELECT * FROM param_test(test_str=s); From b48a20735fe8ada2b02684a283331627b264a2e6 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 14 Jun 2024 13:26:23 +0200 Subject: [PATCH 015/100] Mask account key for azureBlobStorage & azureBlobStorageCluster functions --- .../FunctionSecretArgumentsFinderAST.h | 44 +++++++++++++++++++ .../test_mask_sensitive_info/test.py | 17 +++++++ 2 files changed, 61 insertions(+) diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 348b2ca9e3a..537c65f829f 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -82,6 +82,16 @@ private: /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) findS3FunctionSecretArguments(/* is_cluster_function= */ true); } + else if (function.name == "azureBlobStorage") + { + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "azureBlobStorageCluster") + { + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true); + } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) @@ -169,6 +179,40 @@ private: markSecretArgument(url_arg_idx + 2); } + void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) + { + /// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature + if (url_arg_idx + 4 < count) + markSecretArgument(url_arg_idx + 4); + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 38cbf8c1aed..28e15fe0602 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -11,6 +11,7 @@ node = cluster.add_instance( ], user_configs=["configs/users.xml"], with_zookeeper=True, + with_azurite=True, ) @@ -327,6 +328,10 @@ def test_create_database(): def test_table_functions(): password = new_password() + azure_conn_string = cluster.env_variables['AZURITE_CONNECTION_STRING'] + azure_storage_account_url = cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL'] + azure_account_name = "devstoreaccount1" + azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" table_functions = [ f"mysql('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", @@ -365,6 +370,12 @@ def test_table_functions(): f"deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", "DNS_ERROR", ), + f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_1.csv', '{azure_account_name}', '{azure_account_key}')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_3.csv', 'CSV')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '{azure_account_key}')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", ] def make_test_case(i): @@ -438,6 +449,12 @@ def test_table_functions(): "CREATE TABLE tablefunc30 (x int) AS s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')", "CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')", "CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", + f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", + f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_1.csv', '{azure_account_name}', '[HIDDEN]')", + f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", + f"CREATE TABLE tablefunc36 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_3.csv', 'CSV')", + f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]')", + f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", ], must_not_contain=[password], ) From 234f7e0d135882b92c46da78ba0da864a613095c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 14 Jun 2024 11:38:18 +0000 Subject: [PATCH 016/100] Automatic style fix --- tests/integration/test_mask_sensitive_info/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 28e15fe0602..965d6518164 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -328,8 +328,8 @@ def test_create_database(): def test_table_functions(): password = new_password() - azure_conn_string = cluster.env_variables['AZURITE_CONNECTION_STRING'] - azure_storage_account_url = cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL'] + azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"] + azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] azure_account_name = "devstoreaccount1" azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" From 81e5bfa689f1d306d906d3dff92475307c661bad Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 14 Jun 2024 16:04:52 +0200 Subject: [PATCH 017/100] Move analyzeFunctionParamValues into QueryAnalyzer.cpp --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 6 +++++- src/Interpreters/Context.cpp | 9 +++------ src/Interpreters/Context.h | 3 +-- src/Parsers/FunctionParameterValuesVisitor.cpp | 15 +++++++++------ src/Parsers/FunctionParameterValuesVisitor.h | 2 +- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 0bf61bb508b..dde89250a53 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -4505,8 +4506,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } + NameToNameMap param_values = analyzeFunctionParamValues(function_ast, &scope.aliases); auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage( - function_ast, database_name, table_name, scope.aliases); + database_name, + table_name, + param_values); if (parametrized_view_storage) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7fb1b0d0374..9e4ae99fc0a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -1872,7 +1871,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, ScopeAliases{}); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, nullptr); StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; @@ -2073,8 +2072,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } -StoragePtr Context::buildParametrizedViewStorage( - const ASTPtr & table_expression, const String & database_name, const String & table_name, const ScopeAliases & scope_aliases) +StoragePtr Context::buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values) { if (table_name.empty()) return nullptr; @@ -2087,8 +2085,7 @@ StoragePtr Context::buildParametrizedViewStorage( return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, scope_aliases); - StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + StorageView::replaceQueryParametersIfParametrizedView(query, param_values); ASTCreateQuery create; create.select = query->as(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 5c0f96a04a9..7e3bb8d8e72 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -744,8 +744,7 @@ public: /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); - StoragePtr buildParametrizedViewStorage( - const ASTPtr & table_expression, const String & database_name, const String & table_name, const ScopeAliases & scope_aliases); + StoragePtr buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values); void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 44bf36c2526..1b629351b26 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes class FunctionParameterValuesVisitor { public: - explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, const ScopeAliases & aliases_) + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, const ScopeAliases * aliases_) : parameter_values(parameter_values_), aliases(aliases_) { } @@ -41,7 +41,7 @@ public: private: NameToNameMap & parameter_values; - const ScopeAliases & aliases; + const ScopeAliases * aliases; std::string tryGetParameterValueAsString(const ASTPtr & ast) { @@ -51,10 +51,13 @@ private: } else if (const auto * value_identifier = ast->as()) { - auto it = aliases.alias_name_to_expression_node_before_group_by.find(value_identifier->name()); - if (it != aliases.alias_name_to_expression_node_before_group_by.end()) + if (aliases) { - return tryGetParameterValueAsString(it->second->toAST()); + auto it = aliases->alias_name_to_expression_node_before_group_by.find(value_identifier->name()); + if (it != aliases->alias_name_to_expression_node_before_group_by.end()) + { + return tryGetParameterValueAsString(it->second->toAST()); + } } } else if (const auto * function = ast->as()) @@ -93,7 +96,7 @@ private: } }; -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases & scope_aliases) +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases * scope_aliases) { NameToNameMap parameter_values; FunctionParameterValuesVisitor(parameter_values, scope_aliases).visit(ast); diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 01ce79a2a76..9f19c68a852 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -9,6 +9,6 @@ namespace DB struct ScopeAliases; /// Find parameters in a query parameter values and collect them into map. -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases & scope_aliases); +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ScopeAliases * scope_aliases = nullptr); } From e77c0ad029b6843c8b08d4d008772027b3d42f13 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 17 Jun 2024 22:28:28 +0200 Subject: [PATCH 018/100] Support inline expressions --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 24 ++++++++++--- src/Interpreters/Context.cpp | 4 +-- .../FunctionParameterValuesVisitor.cpp | 34 ++++++++++++------- src/Parsers/FunctionParameterValuesVisitor.h | 10 +++++- ...03167_parametrized_view_with_cte.reference | 3 ++ .../03167_parametrized_view_with_cte.sql | 3 ++ 6 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index dde89250a53..71c7860c13c 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -3496,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) +ProjectionNames QueryAnalyzer::resolveExpressionNode( + QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); @@ -4506,11 +4508,25 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } - NameToNameMap param_values = analyzeFunctionParamValues(function_ast, &scope.aliases); - auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage( + auto context = scope_context->getQueryContext(); + auto param_values_result = analyzeFunctionParamValues(function_ast, context, &scope.aliases); + + for (const auto & [param_name, alias] : param_values_result.unresolved_values) + { + auto it = scope.aliases.alias_name_to_expression_node_before_group_by.find(alias); + if (it != scope.aliases.alias_name_to_expression_node_before_group_by.end()) + { + auto res = resolveExpressionNode(it->second, scope, false, false); + auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(it->second->toAST(), context); + auto resolved_value_str = convertFieldToString(resolved_value->as()->value); + param_values_result.resolved_values.emplace(param_name, std::move(resolved_value_str)); + } + } + + auto parametrized_view_storage = context->buildParametrizedViewStorage( database_name, table_name, - param_values); + param_values_result.resolved_values); if (parametrized_view_storage) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 6804ef9e5e4..b606d6a3628 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1871,7 +1871,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext(), nullptr); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext(), nullptr).resolved_values; StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; @@ -2085,7 +2085,7 @@ StoragePtr Context::buildParametrizedViewStorage(const String & database_name, c return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - StorageView::replaceQueryParametersIfParametrizedView(query, getQueryContext(), param_values); + StorageView::replaceQueryParametersIfParametrizedView(query, param_values); ASTCreateQuery create; create.select = query->as(); diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index dd3e536aadf..2b4f92d39c8 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -25,8 +25,13 @@ namespace ErrorCodes class FunctionParameterValuesVisitor { public: - explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, ContextPtr context_, const ScopeAliases * aliases_) - : parameter_values(parameter_values_), aliases(aliases_), context(context_) + explicit FunctionParameterValuesVisitor( + ParamValuesAnalyzeResult & result_, + ContextPtr context_, + const ScopeAliases * aliases_) + : result(result_) + , aliases(aliases_) + , context(context_) { } @@ -40,11 +45,11 @@ public: } private: - NameToNameMap & parameter_values; + ParamValuesAnalyzeResult & result; const ScopeAliases * aliases; ContextPtr context; - std::string tryGetParameterValueAsString(const ASTPtr & ast) + std::string tryGetParameterValueAsString(const std::string & param_name, const ASTPtr & ast) { if (const auto * literal = ast->as()) { @@ -57,7 +62,10 @@ private: auto it = aliases->alias_name_to_expression_node_before_group_by.find(value_identifier->name()); if (it != aliases->alias_name_to_expression_node_before_group_by.end()) { - return tryGetParameterValueAsString(it->second->toAST()); + auto value_str = tryGetParameterValueAsString(param_name, it->second->toAST()); + if (value_str.empty()) + result.unresolved_values.emplace(param_name, value_identifier->name()); + return value_str; } } } @@ -76,8 +84,8 @@ private: } else { - ASTPtr res = evaluateConstantExpressionOrIdentifierAsLiteral(expression_list->children[1], context); - parameter_values[identifier->name()] = convertFieldToString(res->as()->value); + ASTPtr res = evaluateConstantExpressionOrIdentifierAsLiteral(ast, context); + return convertFieldToString(res->as()->value); } } return ""; @@ -95,18 +103,18 @@ private: if (const auto * identifier = expression_list->children[0]->as()) { - auto value_str = tryGetParameterValueAsString(expression_list->children[1]); + auto value_str = tryGetParameterValueAsString(identifier->name(), expression_list->children[1]); if (!value_str.empty()) - parameter_values[identifier->name()] = value_str; + result.resolved_values[identifier->name()] = value_str; } } }; -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases) +ParamValuesAnalyzeResult analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases) { - NameToNameMap parameter_values; - FunctionParameterValuesVisitor(parameter_values, context, scope_aliases).visit(ast); - return parameter_values; + ParamValuesAnalyzeResult result; + FunctionParameterValuesVisitor(result, context, scope_aliases).visit(ast); + return result; } diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 7193066ee76..89d4fe4e18b 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -10,6 +10,14 @@ namespace DB struct ScopeAliases; /// Find parameters in a query parameter values and collect them into map. -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, ContextPtr context, const ScopeAliases * scope_aliases = nullptr); +struct ParamValuesAnalyzeResult +{ + /// Param name -> resolved param value + NameToNameMap resolved_values; + /// Pram name -> alias + NameToNameMap unresolved_values; +}; + +ParamValuesAnalyzeResult analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases = nullptr); } diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference index 90afb158f23..951910bbe74 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -1,2 +1,5 @@ OK 123 +123 +123 +123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql index 433f4ed040b..c64200ee8ff 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -1,4 +1,7 @@ SET allow_experimental_analyzer=1; CREATE OR REPLACE VIEW param_test AS SELECT {test_str:String} as s_result; WITH 'OK' AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 123) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 100 + 20 + 3) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT number FROM numbers(123, 1)) AS s SELECT * FROM param_test(test_str=s); WITH CAST(123, String) AS s SELECT * FROM param_test(test_str=s); From 660e7f8dd2f62f56c2731f6149e6a85d13fb8bc4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 18 Jun 2024 11:05:00 +0200 Subject: [PATCH 019/100] Better --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 23 ++++++++++++++----- src/Interpreters/Context.cpp | 2 +- .../FunctionParameterValuesVisitor.cpp | 15 +++++------- src/Parsers/FunctionParameterValuesVisitor.h | 6 ++--- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 71c7860c13c..e9f5a55dba9 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4511,22 +4511,33 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, auto context = scope_context->getQueryContext(); auto param_values_result = analyzeFunctionParamValues(function_ast, context, &scope.aliases); - for (const auto & [param_name, alias] : param_values_result.unresolved_values) + for (const auto & [param_name, alias] : param_values_result.unresolved_param_aliases) { auto it = scope.aliases.alias_name_to_expression_node_before_group_by.find(alias); if (it != scope.aliases.alias_name_to_expression_node_before_group_by.end()) { - auto res = resolveExpressionNode(it->second, scope, false, false); - auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(it->second->toAST(), context); - auto resolved_value_str = convertFieldToString(resolved_value->as()->value); - param_values_result.resolved_values.emplace(param_name, std::move(resolved_value_str)); + std::string resolved_value_str; + try + { + resolveExpressionNode(it->second, scope, /* allow_lambda_expression */false, /* allow_table_expression */false); + auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(it->second->toAST(), context); + resolved_value_str = convertFieldToString(resolved_value->as()->value); + } + catch (...) + { + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Failed to resolve alias ({}) value for parameter {} for parametrized view function: {}. Error: {}", + alias, param_name, it->second->formatASTForErrorMessage(), getCurrentExceptionMessage(true)); + } + param_values_result.resolved_param_values.emplace(param_name, std::move(resolved_value_str)); } } auto parametrized_view_storage = context->buildParametrizedViewStorage( database_name, table_name, - param_values_result.resolved_values); + param_values_result.resolved_param_values); if (parametrized_view_storage) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b606d6a3628..e5d1fc5248d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1871,7 +1871,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext(), nullptr).resolved_values; + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext(), nullptr).resolved_param_values; StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 2b4f92d39c8..452ca20ab97 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -4,10 +4,7 @@ #include #include #include -#include #include -#include -#include #include #include #include @@ -49,7 +46,7 @@ private: const ScopeAliases * aliases; ContextPtr context; - std::string tryGetParameterValueAsString(const std::string & param_name, const ASTPtr & ast) + std::optional tryGetParameterValueAsString(const std::string & param_name, const ASTPtr & ast) { if (const auto * literal = ast->as()) { @@ -63,8 +60,8 @@ private: if (it != aliases->alias_name_to_expression_node_before_group_by.end()) { auto value_str = tryGetParameterValueAsString(param_name, it->second->toAST()); - if (value_str.empty()) - result.unresolved_values.emplace(param_name, value_identifier->name()); + if (!value_str.has_value()) + result.unresolved_param_aliases.emplace(param_name, value_identifier->name()); return value_str; } } @@ -88,7 +85,7 @@ private: return convertFieldToString(res->as()->value); } } - return ""; + return std::nullopt; } void visitFunction(const ASTFunction & parameter_function) @@ -104,8 +101,8 @@ private: if (const auto * identifier = expression_list->children[0]->as()) { auto value_str = tryGetParameterValueAsString(identifier->name(), expression_list->children[1]); - if (!value_str.empty()) - result.resolved_values[identifier->name()] = value_str; + if (value_str.has_value()) + result.resolved_param_values[identifier->name()] = value_str.value(); } } }; diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 89d4fe4e18b..50dca8d86de 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -13,9 +13,9 @@ struct ScopeAliases; struct ParamValuesAnalyzeResult { /// Param name -> resolved param value - NameToNameMap resolved_values; - /// Pram name -> alias - NameToNameMap unresolved_values; + NameToNameMap resolved_param_values; + /// Param name -> alias + NameToNameMap unresolved_param_aliases; }; ParamValuesAnalyzeResult analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases = nullptr); From 422288db683de584c546ccec9ca29f02bc02a25b Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jun 2024 17:26:39 +0000 Subject: [PATCH 020/100] Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries --- src/Interpreters/DatabaseCatalog.cpp | 108 ++++++++++++++++++ src/Interpreters/DatabaseCatalog.h | 3 + src/Interpreters/InterpreterCreateQuery.cpp | 36 +++++- src/Interpreters/InterpreterRenameQuery.cpp | 26 +++-- ...ependencies_on_create_and_rename.reference | 0 ...clic_dependencies_on_create_and_rename.sql | 75 ++++++++++++ 6 files changed, 233 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference create mode 100644 tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0f4c8cc26a6..ee96352c347 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -63,6 +63,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; extern const int UNFINISHED; + extern const int INFINITE_LOOP; } class DatabaseNameHints : public IHints<> @@ -1473,6 +1474,113 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked( removing_table, fmt::join(from_other_databases, ", ")); } +void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies( + const QualifiedTableName & table_name, + const TableNamesSet & new_referential_dependencies, + const TableNamesSet & new_loading_dependencies) +{ + std::lock_guard lock{databases_mutex}; + + StorageID table_id = StorageID{table_name}; + + auto check = [&](TablesDependencyGraph & dependencies, const TableNamesSet & new_dependencies) + { + auto old_dependencies = dependencies.removeDependencies(table_id); + dependencies.addDependencies(table_name, new_dependencies); + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + /// Restore previous dependencies before throwing an exception. + dependencies.removeDependencies(table_id); + dependencies.addDependencies(table_id, old_dependencies); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot add dependencies for '{}', because it will lead to cyclic dependencies: {}", + table_name.getFullName(), + cyclic_dependencies_description); + } + + /// Restore previous dependencies. + dependencies.removeDependencies(table_id); + dependencies.addDependencies(table_id, old_dependencies); + }; + + check(referential_dependencies, new_referential_dependencies); + check(loading_dependencies, new_loading_dependencies); +} + +void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies = dependencies.removeDependencies(from_table_id); + dependencies.addDependencies(to_table_id, old_dependencies); + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + /// Restore previous dependencies before throwing an exception. + dependencies.removeDependencies(to_table_id); + dependencies.addDependencies(from_table_id, old_dependencies); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot rename '{}' to '{}', because it will lead to cyclic dependencies: {}", + from_table_id.getFullTableName(), + to_table_id.getFullTableName(), + cyclic_dependencies_description); + } + + /// Restore previous dependencies. + dependencies.removeDependencies(to_table_id); + dependencies.addDependencies(from_table_id, old_dependencies); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + +void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies_1 = dependencies.removeDependencies(table_id_1); + auto old_dependencies_2 = dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_2); + dependencies.addDependencies(table_id_2, old_dependencies_1); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id_1); + dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_1); + dependencies.addDependencies(table_id_2, old_dependencies_2); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + /// Restore previous dependencies before throwing an exception. + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot exchange '{}' and '{}', because it will lead to cyclic dependencies: {}", + table_id_1.getFullTableName(), + table_id_2.getFullTableName(), + cyclic_dependencies_description); + } + + /// Restore previous dependencies. + restore_dependencies(); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + void DatabaseCatalog::cleanupStoreDirectoryTask() { for (const auto & [disk_name, disk] : getContext()->getDisksMap()) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 37125d9900c..a5f1f47d7a0 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -244,6 +244,9 @@ public: void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool check_referential_dependencies, bool check_loading_dependencies, bool is_drop_database = false) const; + void checkTableCanBeAddedWithNoCyclicDependencies(const QualifiedTableName & table_name, const TableNamesSet & new_referential_dependencies, const TableNamesSet & new_loading_dependencies); + void checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id); + void checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2); struct TableMarkedAsDropped { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a78f6cc39ef..e0d743b130e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1077,6 +1077,27 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data } +namespace +{ + +void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +} + BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { /// Temporary tables are created out of databases. @@ -1322,11 +1343,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; /// If table has dependencies - add them to the graph - QualifiedTableName qualified_name{database_name, create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); - + addTableDependencies(create, query_ptr, getContext()); return fillTableIfNeeded(create); } @@ -1478,6 +1495,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find UUID mapping for {}, it's a bug", create.uuid); } + /// Before actually creating the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, getContext()); + StoragePtr res; /// NOTE: CREATE query may be rewritten by Storage creator or table function if (create.as_table_function) @@ -1578,6 +1598,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, ContextMutablePtr create_context = Context::createCopy(current_context); create_context->setQueryContext(std::const_pointer_cast(current_context)); + /// Before actually creating/replacing the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, create_context); + auto make_drop_context = [&]() -> ContextMutablePtr { ContextMutablePtr drop_context = Context::createCopy(current_context); @@ -1624,6 +1647,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, assert(done); created = true; + /// If table has dependencies - add them to the graph + addTableDependencies(create, query_ptr, getContext()); + /// Try fill temporary table BlockIO fill_io = fillTableIfNeeded(create); executeTrivialBlockIO(fill_io, getContext()); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index eeb762b4d7e..b34368fe4ab 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -127,15 +127,17 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c { StorageID from_table_id{elem.from_database_name, elem.from_table_name}; StorageID to_table_id{elem.to_database_name, elem.to_table_name}; - std::vector ref_dependencies; - std::vector loading_dependencies; - if (!exchange_tables) - { - bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; - bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; - std::tie(ref_dependencies, loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); - } + if (exchange_tables) + DatabaseCatalog::instance().checkTablesCanBeExchangedWithNoCyclicDependencies(from_table_id, to_table_id); + else + DatabaseCatalog::instance().checkTableCanBeRenamedWithNoCyclicDependencies(from_table_id, to_table_id); + + bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; + bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; + auto [from_ref_dependencies, from_loading_dependencies] = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); + /// In case of just rename to_ref_dependencies and to_loading_dependencies will be empty. + auto [to_ref_dependencies, to_loading_dependencies] = database_catalog.removeDependencies(to_table_id, check_ref_deps, check_loading_deps); try { @@ -147,12 +149,16 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c exchange_tables, rename.dictionary); - DatabaseCatalog::instance().addDependencies(to_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(to_table_id, from_ref_dependencies, from_loading_dependencies); + /// In case of just rename to_ref_dependencies and to_loading_dependencies will be empty and no dependencies will be added. + DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies); + } catch (...) { /// Restore dependencies if RENAME fails - DatabaseCatalog::instance().addDependencies(from_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(from_table_id, from_ref_dependencies, from_loading_dependencies); + DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies); throw; } } diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql new file mode 100644 index 00000000000..39dda475d1e --- /dev/null +++ b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql @@ -0,0 +1,75 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (id UInt64, value String) ENGINE=MergeTree ORDER BY id; +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(10); +DROP DICTIONARY IF EXISTS test_dict; +CREATE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM test')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); +DROP TABLE IF EXISTS view_source; +CREATE TABLE view_source (id UInt64) ENGINE=MergeTree ORDER BY id; +INSERT INTO view_source SELECT * FROM numbers(5); +DROP VIEW IF EXISTS view; +CREATE VIEW view AS SELECT id, dictGet('test_dict', 'value', id) FROM view_source; + +CREATE OR REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + +REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + + +DROP DICTIONARY IF EXISTS test_dict_2; +CREATE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; + +CREATE OR REPLACE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict; +RENAME DICTIONARY test_dict_2 to test_dict; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; +DROP VIEW view; +DROP TABLE test; +DROP TABLE view_source; + From bfb68da3507f82a5d77cc7cbf1bbd5bd12cc3200 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jun 2024 17:33:31 +0000 Subject: [PATCH 021/100] Make the code better --- src/Interpreters/DatabaseCatalog.cpp | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ee96352c347..aaec94a4fb0 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1487,13 +1487,17 @@ void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies( { auto old_dependencies = dependencies.removeDependencies(table_id); dependencies.addDependencies(table_name, new_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id); + if (!old_dependencies.empty()) + dependencies.addDependencies(table_id, old_dependencies); + }; if (dependencies.hasCyclicDependencies()) { auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); - /// Restore previous dependencies before throwing an exception. - dependencies.removeDependencies(table_id); - dependencies.addDependencies(table_id, old_dependencies); + restore_dependencies(); throw Exception( ErrorCodes::INFINITE_LOOP, "Cannot add dependencies for '{}', because it will lead to cyclic dependencies: {}", @@ -1501,9 +1505,7 @@ void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies( cyclic_dependencies_description); } - /// Restore previous dependencies. - dependencies.removeDependencies(table_id); - dependencies.addDependencies(table_id, old_dependencies); + restore_dependencies(); }; check(referential_dependencies, new_referential_dependencies); @@ -1518,13 +1520,16 @@ void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const Stora { auto old_dependencies = dependencies.removeDependencies(from_table_id); dependencies.addDependencies(to_table_id, old_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(to_table_id); + dependencies.addDependencies(from_table_id, old_dependencies); + }; if (dependencies.hasCyclicDependencies()) { auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); - /// Restore previous dependencies before throwing an exception. - dependencies.removeDependencies(to_table_id); - dependencies.addDependencies(from_table_id, old_dependencies); + restore_dependencies(); throw Exception( ErrorCodes::INFINITE_LOOP, "Cannot rename '{}' to '{}', because it will lead to cyclic dependencies: {}", @@ -1533,9 +1538,7 @@ void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const Stora cyclic_dependencies_description); } - /// Restore previous dependencies. - dependencies.removeDependencies(to_table_id); - dependencies.addDependencies(from_table_id, old_dependencies); + restore_dependencies(); }; check(referential_dependencies); @@ -1563,7 +1566,6 @@ void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const St if (dependencies.hasCyclicDependencies()) { auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); - /// Restore previous dependencies before throwing an exception. restore_dependencies(); throw Exception( ErrorCodes::INFINITE_LOOP, @@ -1573,7 +1575,6 @@ void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const St cyclic_dependencies_description); } - /// Restore previous dependencies. restore_dependencies(); }; From bc5db30669f90fc0b2b13811b5c6539a16cb2429 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jun 2024 19:08:24 +0000 Subject: [PATCH 022/100] Fix usage of current_database --- src/Databases/DDLLoadingDependencyVisitor.cpp | 4 +-- src/Databases/DDLLoadingDependencyVisitor.h | 2 +- src/Databases/DatabaseMemory.cpp | 4 +-- src/Databases/DatabaseOrdinary.cpp | 4 +-- src/Databases/TablesLoader.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 12 ++++++--- src/Interpreters/InterpreterRenameQuery.cpp | 26 ++++++++++++------- .../0_stateless/01191_rename_dictionary.sql | 2 +- ...clic_dependencies_on_create_and_rename.sql | 12 ++++----- 9 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index b8690125aaa..0253709fb6e 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -21,11 +21,11 @@ namespace DB using TableLoadingDependenciesVisitor = DDLLoadingDependencyVisitor::Visitor; -TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast, const String & default_database) { assert(global_context == global_context->getGlobalContext()); TableLoadingDependenciesVisitor::Data data; - data.default_database = global_context->getCurrentDatabase(); + data.default_database = default_database; data.create_query = ast; data.global_context = global_context; data.table_name = table; diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h index a9e9f4d7a53..099d9c1979b 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.h +++ b/src/Databases/DDLLoadingDependencyVisitor.h @@ -16,7 +16,7 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables which should be loaded before a specified table. /// For example, a local ClickHouse table should be loaded before a dictionary which uses that table as its source. /// Does not validate AST, works a best-effort way. -TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast, const String & default_database); class DDLMatcherBase diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index b82cf885b4a..dce20e3ac6f 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -154,8 +154,8 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl applyMetadataChangesToCreateQuery(it->second, metadata); /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context, table_id.getQualifiedName(), it->second); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 10a8e06e8f0..18c92e6bcbc 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -539,8 +539,8 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta } /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context, table_id.getQualifiedName(), ast); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, local_context); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 6aa13b7b759..4bfe44ba72c 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -138,7 +138,7 @@ void TablesLoader::buildDependencyGraph() for (const auto & [table_name, table_metadata] : metadata.parsed_tables) { auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); - auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); + auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); if (!new_ref_dependencies.empty()) referential_dependencies.addDependencies(table_name, new_ref_dependencies); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index e0d743b130e..f6b6b34413a 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1083,16 +1083,20 @@ namespace void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + auto ref_dependencies = getDependenciesFromCreateQuery(context, qualified_name, query_ptr); + String ref_dependencies_str; + for (const auto & ref_dep : ref_dependencies) + ref_dependencies_str += ref_dep.getFullName() + ","; + LOG_DEBUG(getLogger("InterpreterCreateQuery"), "Add ref dependencies for table {}: {}", qualified_name.getFullName(), ref_dependencies_str); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); } void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + auto ref_dependencies = getDependenciesFromCreateQuery(context, qualified_name, query_ptr); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index b34368fe4ab..32c475d138f 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -127,17 +127,24 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c { StorageID from_table_id{elem.from_database_name, elem.from_table_name}; StorageID to_table_id{elem.to_database_name, elem.to_table_name}; + std::vector from_ref_dependencies; + std::vector from_loading_dependencies; + std::vector to_ref_dependencies; + std::vector to_loading_dependencies; if (exchange_tables) + { DatabaseCatalog::instance().checkTablesCanBeExchangedWithNoCyclicDependencies(from_table_id, to_table_id); + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, false, false); + std::tie(to_ref_dependencies, to_loading_dependencies) = database_catalog.removeDependencies(to_table_id, false, false); + } else + { DatabaseCatalog::instance().checkTableCanBeRenamedWithNoCyclicDependencies(from_table_id, to_table_id); - - bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; - bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; - auto [from_ref_dependencies, from_loading_dependencies] = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); - /// In case of just rename to_ref_dependencies and to_loading_dependencies will be empty. - auto [to_ref_dependencies, to_loading_dependencies] = database_catalog.removeDependencies(to_table_id, check_ref_deps, check_loading_deps); + bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; + bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); + } try { @@ -150,15 +157,16 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c rename.dictionary); DatabaseCatalog::instance().addDependencies(to_table_id, from_ref_dependencies, from_loading_dependencies); - /// In case of just rename to_ref_dependencies and to_loading_dependencies will be empty and no dependencies will be added. - DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies); } catch (...) { /// Restore dependencies if RENAME fails DatabaseCatalog::instance().addDependencies(from_table_id, from_ref_dependencies, from_loading_dependencies); - DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies); throw; } } diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 6666c3308ca..c5012dabc81 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -17,7 +17,7 @@ SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; RENAME DICTIONARY test_01191.table TO test_01191.table1; -- {serverError UNKNOWN_TABLE} -EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INCORRECT_QUERY} +EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INFINITE_LOOP} EXCHANGE TABLES test_01191.t AND test_01191.dict; SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql index 39dda475d1e..e6215c3da1e 100644 --- a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql +++ b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql @@ -8,14 +8,14 @@ CREATE DICTIONARY test_dict value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM test')) +SOURCE(CLICKHOUSE(TABLE test)) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); DROP TABLE IF EXISTS view_source; CREATE TABLE view_source (id UInt64) ENGINE=MergeTree ORDER BY id; INSERT INTO view_source SELECT * FROM numbers(5); DROP VIEW IF EXISTS view; -CREATE VIEW view AS SELECT id, dictGet('test_dict', 'value', id) FROM view_source; +CREATE VIEW view AS SELECT id, dictGet('test_dict', 'value', id) as value FROM view_source; CREATE OR REPLACE DICTIONARY test_dict ( @@ -23,7 +23,7 @@ CREATE OR REPLACE DICTIONARY test_dict value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +SOURCE(CLICKHOUSE(TABLE view)) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} @@ -33,7 +33,7 @@ REPLACE DICTIONARY test_dict value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +SOURCE(CLICKHOUSE(TABLE view)) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} @@ -45,7 +45,7 @@ CREATE DICTIONARY test_dict_2 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +SOURCE(CLICKHOUSE(TABLE view)) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); @@ -59,7 +59,7 @@ CREATE OR REPLACE DICTIONARY test_dict_2 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(QUERY 'SELECT * FROM view')) +SOURCE(CLICKHOUSE(TABLE view)) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); From 1c15ad54f4345a6c719d8c0929833b3fab73d5ad Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jun 2024 19:11:38 +0000 Subject: [PATCH 023/100] Remove debug logging --- src/Interpreters/InterpreterCreateQuery.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f6b6b34413a..35207bc9d39 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1084,10 +1084,6 @@ void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_pt { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; auto ref_dependencies = getDependenciesFromCreateQuery(context, qualified_name, query_ptr); - String ref_dependencies_str; - for (const auto & ref_dep : ref_dependencies) - ref_dependencies_str += ref_dep.getFullName() + ","; - LOG_DEBUG(getLogger("InterpreterCreateQuery"), "Add ref dependencies for table {}: {}", qualified_name.getFullName(), ref_dependencies_str); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); } From a577c87cf1d1bdeae204371e82071de401c4f61c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jun 2024 20:21:20 +0000 Subject: [PATCH 024/100] Fix tests --- src/Databases/DDLDependencyVisitor.cpp | 5 +++++ .../00987_distributed_stack_overflow.sql | 6 +----- .../0_stateless/01552_dict_fixedstring.sql | 2 +- ...0_ddl_dictionary_use_current_database_name.sql | 2 +- .../0_stateless/01763_max_distributed_depth.sql | 15 +-------------- .../01764_table_function_dictionary.sql | 2 +- .../01804_dictionary_decimal256_type.sql | 2 ++ .../01904_dictionary_default_nullable_type.sql | 2 ++ .../queries/0_stateless/01910_view_dictionary.sql | 2 +- .../02008_complex_key_range_hashed_dictionary.sql | 4 ++-- .../0_stateless/02155_dictionary_comment.sql | 2 +- .../0_stateless/02183_dictionary_date_types.sql | 2 +- .../02185_range_hashed_dictionary_open_ranges.sql | 1 + .../0_stateless/02391_recursive_buffer.sql | 6 +----- ...03169_cache_complex_dict_short_circuit_bug.sql | 2 +- 15 files changed, 22 insertions(+), 33 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 75a01a6190f..fdc51f4f43d 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -95,6 +95,11 @@ namespace as_table.database = current_database; dependencies.emplace(as_table); } + + /// Visit nested select query only for views, for other cases it's not + /// an actual dependency as it will be executed only once to fill the table. + if (create.select && !create.isView()) + skip_asts.insert(create.select); } /// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...) diff --git a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql index 5a22ac56413..ba58713fe0e 100644 --- a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql +++ b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql @@ -9,10 +9,6 @@ CREATE TABLE distr (x UInt8) ENGINE = Distributed(test_shard_localhost, currentD CREATE TABLE distr0 (x UInt8) ENGINE = Distributed(test_shard_localhost, '', distr0); -- { serverError INFINITE_LOOP } CREATE TABLE distr1 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr2); -CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); - -SELECT * FROM distr1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } -SELECT * FROM distr2; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); -- { serverError INFINITE_LOOP } DROP TABLE distr1; -DROP TABLE distr2; diff --git a/tests/queries/0_stateless/01552_dict_fixedstring.sql b/tests/queries/0_stateless/01552_dict_fixedstring.sql index 01d55656e3c..0b19c9980a4 100644 --- a/tests/queries/0_stateless/01552_dict_fixedstring.sql +++ b/tests/queries/0_stateless/01552_dict_fixedstring.sql @@ -16,5 +16,5 @@ LIFETIME(MIN 10 MAX 10); SELECT dictGet(currentDatabase() || '.dict', 's', number) FROM numbers(2); -DROP TABLE src; DROP DICTIONARY dict; +DROP TABLE src; diff --git a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql index 55c0d1e3678..a7f04921f1f 100644 --- a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql +++ b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql @@ -27,5 +27,5 @@ SELECT dictGet('ddl_dictionary_test', 'value', number) FROM system.numbers LIMIT SELECT 'dictHas'; SELECT dictHas('ddl_dictionary_test', number) FROM system.numbers LIMIT 3; -DROP TABLE ddl_dictonary_test_source; DROP DICTIONARY ddl_dictionary_test; +DROP TABLE ddl_dictonary_test_source; diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index 08dc533876d..f722a88226d 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -17,19 +17,6 @@ ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); DROP TABLE IF EXISTS tt7; -CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); - -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SELECT * FROM tt6; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SET max_distributed_depth = 0; - --- stack overflow -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_DEEP_RECURSION} - --- stack overflow -SELECT * FROM tt6; -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); -- {serverError INFINITE_LOOP} DROP TABLE tt6; -DROP TABLE tt7; diff --git a/tests/queries/0_stateless/01764_table_function_dictionary.sql b/tests/queries/0_stateless/01764_table_function_dictionary.sql index b642fdd741e..76e7213b367 100644 --- a/tests/queries/0_stateless/01764_table_function_dictionary.sql +++ b/tests/queries/0_stateless/01764_table_function_dictionary.sql @@ -23,5 +23,5 @@ LAYOUT(DIRECT()); SELECT * FROM dictionary('table_function_dictionary_test_dictionary'); -DROP TABLE table_function_dictionary_source_table; DROP DICTIONARY table_function_dictionary_test_dictionary; +DROP TABLE table_function_dictionary_source_table; diff --git a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql index 77e9abfb742..08a8d0feb27 100644 --- a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql +++ b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql @@ -25,6 +25,8 @@ LAYOUT(FLAT()); SELECT 'Flat dictionary'; SELECT dictGet('flat_dictionary', 'decimal_value', toUInt64(1)); +DROP DICTIONARY flat_dictionary; + DROP DICTIONARY IF EXISTS hashed_dictionary; CREATE DICTIONARY hashed_dictionary ( diff --git a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql index 4c623941a19..d28f9e5c4e6 100644 --- a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql +++ b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql @@ -111,6 +111,8 @@ LAYOUT(IP_TRIE()); SELECT 'IPTrie dictionary'; SELECT dictGet('ip_trie_dictionary', 'value', tuple(IPv4StringToNum('127.0.0.0'))); --{serverError UNSUPPORTED_METHOD} +DROP DICTIONARY ip_trie_dictionary; + DROP TABLE dictionary_nullable_source_table; DROP TABLE dictionary_nullable_default_source_table; diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 1f9928735b4..05a67889825 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -45,5 +45,5 @@ FROM numbers(3); DROP TABLE dictionary_source_en; DROP TABLE dictionary_source_ru; -DROP TABLE dictionary_source_view; DROP DICTIONARY flat_dictionary; +DROP TABLE dictionary_source_view; diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql index 72cac481376..ea2dad5c732 100644 --- a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql @@ -53,8 +53,8 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, Start SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary; +DROP TABLE date_table; CREATE TABLE date_table ( @@ -107,5 +107,5 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY Country SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary_nullable; +DROP TABLE date_table; diff --git a/tests/queries/0_stateless/02155_dictionary_comment.sql b/tests/queries/0_stateless/02155_dictionary_comment.sql index 30b85e16a7c..8ebc7b259fc 100644 --- a/tests/queries/0_stateless/02155_dictionary_comment.sql +++ b/tests/queries/0_stateless/02155_dictionary_comment.sql @@ -49,5 +49,5 @@ SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_vie SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_view' AND database == currentDatabase(); DROP TABLE 02155_test_dictionary_view; -DROP TABLE 02155_test_table; DROP DICTIONARY 02155_test_dictionary; +DROP TABLE 02155_test_table; diff --git a/tests/queries/0_stateless/02183_dictionary_date_types.sql b/tests/queries/0_stateless/02183_dictionary_date_types.sql index e06863d5e53..a6db502e7e8 100644 --- a/tests/queries/0_stateless/02183_dictionary_date_types.sql +++ b/tests/queries/0_stateless/02183_dictionary_date_types.sql @@ -170,8 +170,8 @@ LIFETIME(0); SELECT 'Polygon dictionary'; SELECT * FROM 02183_polygon_dictionary; +DROP DICTIONARY 02183_polygon_dictionr; DROP TABLE 02183_polygon_dictionary_source_table; -DROP DICTIONARY 02183_polygon_dictionary; DROP TABLE IF EXISTS 02183_range_dictionary_source_table; CREATE TABLE 02183_range_dictionary_source_table diff --git a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql index e6edee2ea18..a36c72de0ac 100644 --- a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql +++ b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql @@ -60,4 +60,5 @@ SELECT dictHas('02185_range_dictionary', 0, 0); SELECT dictHas('02185_range_dictionary', 0, 5001); SELECT dictHas('02185_range_dictionary', 0, 10001); +DROP DICTIONARY 02185_range_dictionary; DROP TABLE 02185_range_dictionary_source_table; diff --git a/tests/queries/0_stateless/02391_recursive_buffer.sql b/tests/queries/0_stateless/02391_recursive_buffer.sql index 1a630722b5a..60a2f0d1af1 100644 --- a/tests/queries/0_stateless/02391_recursive_buffer.sql +++ b/tests/queries/0_stateless/02391_recursive_buffer.sql @@ -10,9 +10,5 @@ DROP TABLE test; DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; CREATE TABLE test1 (key UInt32) Engine = Buffer(currentDatabase(), test2, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -SELECT * FROM test1; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM test2; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM system.tables WHERE table IN ('test1', 'test2') AND database = currentDatabase(); -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -- { serverError INFINITE_LOOP } DROP TABLE test1; -DROP TABLE test2; diff --git a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql index 8463d13d251..f91aaf39081 100644 --- a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql +++ b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql @@ -27,5 +27,5 @@ LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10)); SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; -DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit; +DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; From a9615f2e0bdf58800fbbfa2eb4a39fe48a421325 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jun 2024 10:31:19 +0000 Subject: [PATCH 025/100] Fix tests --- tests/queries/0_stateless/02183_dictionary_date_types.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02183_dictionary_date_types.sql b/tests/queries/0_stateless/02183_dictionary_date_types.sql index a6db502e7e8..7a2d2585e9d 100644 --- a/tests/queries/0_stateless/02183_dictionary_date_types.sql +++ b/tests/queries/0_stateless/02183_dictionary_date_types.sql @@ -170,7 +170,7 @@ LIFETIME(0); SELECT 'Polygon dictionary'; SELECT * FROM 02183_polygon_dictionary; -DROP DICTIONARY 02183_polygon_dictionr; +DROP DICTIONARY 02183_polygon_dictionry; DROP TABLE 02183_polygon_dictionary_source_table; DROP TABLE IF EXISTS 02183_range_dictionary_source_table; From 20abea8f64ff3d367d01b66e81857ca74e514992 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:31:18 +0200 Subject: [PATCH 026/100] Fix typo in test --- tests/queries/0_stateless/02183_dictionary_date_types.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02183_dictionary_date_types.sql b/tests/queries/0_stateless/02183_dictionary_date_types.sql index 7a2d2585e9d..5671f47cdab 100644 --- a/tests/queries/0_stateless/02183_dictionary_date_types.sql +++ b/tests/queries/0_stateless/02183_dictionary_date_types.sql @@ -170,7 +170,7 @@ LIFETIME(0); SELECT 'Polygon dictionary'; SELECT * FROM 02183_polygon_dictionary; -DROP DICTIONARY 02183_polygon_dictionry; +DROP DICTIONARY 02183_polygon_dictionary; DROP TABLE 02183_polygon_dictionary_source_table; DROP TABLE IF EXISTS 02183_range_dictionary_source_table; From 78ccd03dd6ae344a8bb63898262298ba343683c6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 18 Jun 2024 19:06:06 +0200 Subject: [PATCH 027/100] A few code renames in preparation for AzureQueue --- src/CMakeLists.txt | 2 +- src/Common/SystemLogBase.h | 2 +- src/Core/SettingsEnums.cpp | 12 +- src/Core/SettingsEnums.h | 8 +- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 4 +- src/Interpreters/S3QueueLog.cpp | 8 +- src/Interpreters/S3QueueLog.h | 12 +- src/Interpreters/SystemLog.cpp | 2 +- src/Interpreters/SystemLog.h | 4 +- .../StorageObjectStorageSource.h | 2 +- .../ObjectStorageQueueIFileMetadata.cpp} | 40 ++-- .../ObjectStorageQueueIFileMetadata.h} | 8 +- .../ObjectStorageQueueMetadata.cpp} | 131 +++++------ .../ObjectStorageQueueMetadata.h} | 38 +-- .../ObjectStorageQueueMetadataFactory.cpp} | 18 +- .../ObjectStorageQueueMetadataFactory.h | 37 +++ ...ObjectStorageQueueOrderedFileMetadata.cpp} | 38 +-- .../ObjectStorageQueueOrderedFileMetadata.h} | 10 +- .../ObjectStorageQueueSettings.cpp} | 11 +- .../ObjectStorageQueueSettings.h | 47 ++++ .../ObjectStorageQueueSource.cpp} | 61 +++-- .../ObjectStorageQueueSource.h} | 42 ++-- .../ObjectStorageQueueTableMetadata.cpp} | 78 +++--- .../ObjectStorageQueueTableMetadata.h} | 20 +- ...jectStorageQueueUnorderedFileMetadata.cpp} | 12 +- ...ObjectStorageQueueUnorderedFileMetadata.h} | 6 +- .../StorageObjectStorageQueue.cpp} | 222 ++++++------------ .../StorageObjectStorageQueue.h} | 27 +-- .../ObjectStorageQueue/registerS3Queue.cpp | 87 +++++++ src/Storages/S3Queue/S3QueueMetadataFactory.h | 37 --- src/Storages/S3Queue/S3QueueSettings.h | 47 ---- src/Storages/System/StorageSystemS3Queue.cpp | 8 +- .../integration/test_storage_s3_queue/test.py | 8 +- 34 files changed, 547 insertions(+), 544 deletions(-) rename src/Storages/{S3Queue/S3QueueIFileMetadata.cpp => ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp} (88%) rename src/Storages/{S3Queue/S3QueueIFileMetadata.h => ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h} (95%) rename src/Storages/{S3Queue/S3QueueMetadata.cpp => ObjectStorageQueue/ObjectStorageQueueMetadata.cpp} (74%) rename src/Storages/{S3Queue/S3QueueMetadata.h => ObjectStorageQueue/ObjectStorageQueueMetadata.h} (62%) rename src/Storages/{S3Queue/S3QueueMetadataFactory.cpp => ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp} (62%) create mode 100644 src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h rename src/Storages/{S3Queue/S3QueueOrderedFileMetadata.cpp => ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp} (89%) rename src/Storages/{S3Queue/S3QueueOrderedFileMetadata.h => ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h} (86%) rename src/Storages/{S3Queue/S3QueueSettings.cpp => ObjectStorageQueue/ObjectStorageQueueSettings.cpp} (59%) create mode 100644 src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h rename src/Storages/{S3Queue/S3QueueSource.cpp => ObjectStorageQueue/ObjectStorageQueueSource.cpp} (89%) rename src/Storages/{S3Queue/S3QueueSource.h => ObjectStorageQueue/ObjectStorageQueueSource.h} (69%) rename src/Storages/{S3Queue/S3QueueTableMetadata.cpp => ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp} (72%) rename src/Storages/{S3Queue/S3QueueTableMetadata.h => ObjectStorageQueue/ObjectStorageQueueTableMetadata.h} (50%) rename src/Storages/{S3Queue/S3QueueUnorderedFileMetadata.cpp => ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp} (92%) rename src/Storages/{S3Queue/S3QueueUnorderedFileMetadata.h => ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h} (75%) rename src/Storages/{S3Queue/StorageS3Queue.cpp => ObjectStorageQueue/StorageObjectStorageQueue.cpp} (59%) rename src/Storages/{S3Queue/StorageS3Queue.h => ObjectStorageQueue/StorageObjectStorageQueue.h} (75%) create mode 100644 src/Storages/ObjectStorageQueue/registerS3Queue.cpp delete mode 100644 src/Storages/S3Queue/S3QueueMetadataFactory.h delete mode 100644 src/Storages/S3Queue/S3QueueSettings.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84aaec17a5b..79352bc7258 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -222,7 +222,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_statistics Storages/Statistics) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_windowview Storages/WindowView) -add_object_library(clickhouse_storages_s3queue Storages/S3Queue) +add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 95906c63349..c6cc2f8f8c9 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -27,7 +27,7 @@ M(ZooKeeperLogElement) \ M(ProcessorProfileLogElement) \ M(TextLogElement) \ - M(S3QueueLogElement) \ + M(ObjectStorageQueueLogElement) \ M(FilesystemCacheLogElement) \ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..18034d846df 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -201,13 +201,13 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS, {"zlib", FormatSettings::ORCCompression::ZLIB}, {"lz4", FormatSettings::ORCCompression::LZ4}}) -IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS, - {{"ordered", S3QueueMode::ORDERED}, - {"unordered", S3QueueMode::UNORDERED}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueMode, ErrorCodes::BAD_ARGUMENTS, + {{"ordered", ObjectStorageQueueMode::ORDERED}, + {"unordered", ObjectStorageQueueMode::UNORDERED}}) -IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS, - {{"keep", S3QueueAction::KEEP}, - {"delete", S3QueueAction::DELETE}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueAction, ErrorCodes::BAD_ARGUMENTS, + {{"keep", ObjectStorageQueueAction::KEEP}, + {"delete", ObjectStorageQueueAction::DELETE}}) IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS, {{"none", ExternalCommandStderrReaction::NONE}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..2d65bfc7463 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -341,21 +341,21 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) DECLARE_SETTING_ENUM(LocalFSReadMethod) -enum class S3QueueMode : uint8_t +enum class ObjectStorageQueueMode : uint8_t { ORDERED, UNORDERED, }; -DECLARE_SETTING_ENUM(S3QueueMode) +DECLARE_SETTING_ENUM(ObjectStorageQueueMode) -enum class S3QueueAction : uint8_t +enum class ObjectStorageQueueAction : uint8_t { KEEP, DELETE, }; -DECLARE_SETTING_ENUM(S3QueueAction) +DECLARE_SETTING_ENUM(ObjectStorageQueueAction) DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 4b0ebc008ea..0d9f01813b3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4118,7 +4118,7 @@ std::shared_ptr Context::getFilesystemCacheLog() const return shared->system_logs->filesystem_cache_log; } -std::shared_ptr Context::getS3QueueLog() const +std::shared_ptr Context::getS3QueueLog() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f3073ccc09c..e20a76598ae 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -106,7 +106,7 @@ class TransactionsInfoLog; class ProcessorsProfileLog; class FilesystemCacheLog; class FilesystemReadPrefetchesLog; -class S3QueueLog; +class ObjectStorageQueueLog; class AsynchronousInsertLog; class BackupLog; class BlobStorageLog; @@ -1106,7 +1106,7 @@ public: std::shared_ptr getTransactionsInfoLog() const; std::shared_ptr getProcessorsProfileLog() const; std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getS3QueueLog() const; + std::shared_ptr getS3QueueLog() const; std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; std::shared_ptr getBackupLog() const; diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index ba990a8ac25..09aba5d909d 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -14,13 +14,13 @@ namespace DB { -ColumnsDescription S3QueueLogElement::getColumnsDescription() +ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription() { auto status_datatype = std::make_shared( DataTypeEnum8::Values { - {"Processed", static_cast(S3QueueLogElement::S3QueueStatus::Processed)}, - {"Failed", static_cast(S3QueueLogElement::S3QueueStatus::Failed)}, + {"Processed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed)}, + {"Failed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed)}, }); return ColumnsDescription @@ -41,7 +41,7 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() }; } -void S3QueueLogElement::appendToBlock(MutableColumns & columns) const +void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; columns[i++]->insert(getFQDNOrHostName()); diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index 19e69c39247..b0e843a0cc3 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -9,7 +9,7 @@ namespace DB { -struct S3QueueLogElement +struct ObjectStorageQueueLogElement { time_t event_time{}; @@ -20,18 +20,18 @@ struct S3QueueLogElement std::string file_name; size_t rows_processed = 0; - enum class S3QueueStatus : uint8_t + enum class ObjectStorageQueueStatus : uint8_t { Processed, Failed, }; - S3QueueStatus status; + ObjectStorageQueueStatus status; ProfileEvents::Counters::Snapshot counters_snapshot; time_t processing_start_time; time_t processing_end_time; std::string exception; - static std::string name() { return "S3QueueLog"; } + static std::string name() { return "ObjectStorageQueueLog"; } static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } @@ -39,9 +39,9 @@ struct S3QueueLogElement void appendToBlock(MutableColumns & columns) const; }; -class S3QueueLog : public SystemLog +class ObjectStorageQueueLog : public SystemLog { - using SystemLog::SystemLog; + using SystemLog::SystemLog; }; } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3b25deeb59d..d8139504c8e 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -303,7 +303,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index af635ca1bdb..6e8875ef9a6 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -52,7 +52,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class AsynchronousInsertLog; class BackupLog; -class S3QueueLog; +class ObjectStorageQueueLog; class BlobStorageLog; /// System logs should be destroyed in destructor of the last Context and before tables, @@ -74,7 +74,7 @@ struct SystemLogs std::shared_ptr metric_log; /// Used to log all metrics. std::shared_ptr filesystem_cache_log; std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; + std::shared_ptr s3_queue_log; /// Metrics from system.asynchronous_metrics. std::shared_ptr asynchronous_metric_log; /// OpenTelemetry trace spans. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..944dd84865b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -15,7 +15,7 @@ class SchemaCache; class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { - friend class StorageS3QueueSource; + friend class ObjectStorageQueueSource; public: using Configuration = StorageObjectStorage::Configuration; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp similarity index 88% rename from src/Storages/S3Queue/S3QueueIFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 6c4089115d4..8719d9f4635 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -35,19 +35,19 @@ namespace } } -void S3QueueIFileMetadata::FileStatus::onProcessing() +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessing() { state = FileStatus::State::Processing; processing_start_time = now(); } -void S3QueueIFileMetadata::FileStatus::onProcessed() +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessed() { state = FileStatus::State::Processed; processing_end_time = now(); } -void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & exception) { state = FileStatus::State::Failed; processing_end_time = now(); @@ -55,13 +55,13 @@ void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) last_exception = exception; } -std::string S3QueueIFileMetadata::FileStatus::getException() const +std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); return last_exception; } -std::string S3QueueIFileMetadata::NodeMetadata::toString() const +std::string ObjectStorageQueueIFileMetadata::NodeMetadata::toString() const { Poco::JSON::Object json; json.set("file_path", file_path); @@ -76,7 +76,7 @@ std::string S3QueueIFileMetadata::NodeMetadata::toString() const return oss.str(); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -91,7 +91,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromStrin return metadata; } -S3QueueIFileMetadata::S3QueueIFileMetadata( +ObjectStorageQueueIFileMetadata::ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -116,7 +116,7 @@ S3QueueIFileMetadata::S3QueueIFileMetadata( processed_node_path, processing_node_path, failed_node_path); } -S3QueueIFileMetadata::~S3QueueIFileMetadata() +ObjectStorageQueueIFileMetadata::~ObjectStorageQueueIFileMetadata() { if (processing_id_version.has_value()) { @@ -148,9 +148,9 @@ S3QueueIFileMetadata::~S3QueueIFileMetadata() } } -std::string S3QueueIFileMetadata::getNodeName(const std::string & path) +std::string ObjectStorageQueueIFileMetadata::getNodeName(const std::string & path) { - /// Since with are dealing with paths in s3 which can have "/", + /// Since with are dealing with paths in object storage which can have "/", /// we cannot create a zookeeper node with the name equal to path. /// Therefore we use a hash of the path as a node name. @@ -159,7 +159,7 @@ std::string S3QueueIFileMetadata::getNodeName(const std::string & path) return toString(path_hash.get64()); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::createNodeMetadata( const std::string & path, const std::string & exception, size_t retries) @@ -168,9 +168,9 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( /// Since node name is just a hash we want to know to which file it corresponds, /// so we keep "file_path" in nodes data. - /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec. + /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by tracked_file_ttl_sec. /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. - /// "retries" is kept for retrying the processing enabled by s3queue_loading_retries. + /// "retries" is kept for retrying the processing enabled by loading_retries. NodeMetadata metadata; metadata.file_path = path; metadata.last_processed_timestamp = now(); @@ -179,7 +179,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( return metadata; } -std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor_id) +std::string ObjectStorageQueueIFileMetadata::getProcessorInfo(const std::string & processor_id) { /// Add information which will be useful for debugging just in case. Poco::JSON::Object json; @@ -192,7 +192,7 @@ std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor return oss.str(); } -bool S3QueueIFileMetadata::setProcessing() +bool ObjectStorageQueueIFileMetadata::setProcessing() { auto state = file_status->state.load(); if (state == FileStatus::State::Processing @@ -221,7 +221,7 @@ bool S3QueueIFileMetadata::setProcessing() return success; } -void S3QueueIFileMetadata::setProcessed() +void ObjectStorageQueueIFileMetadata::setProcessed() { LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path); @@ -235,7 +235,7 @@ void S3QueueIFileMetadata::setProcessed() LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::setFailed(const std::string & exception) { LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); @@ -254,7 +254,7 @@ void S3QueueIFileMetadata::setFailed(const std::string & exception) LOG_TRACE(log, "Set file {} as failed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailedNonRetriable() +void ObjectStorageQueueIFileMetadata::setFailedNonRetriable() { auto zk_client = getZooKeeper(); Coordination::Requests requests; @@ -285,7 +285,7 @@ void S3QueueIFileMetadata::setFailedNonRetriable() throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error while setting file as failed: {}", code); } -void S3QueueIFileMetadata::setFailedRetriable() +void ObjectStorageQueueIFileMetadata::setFailedRetriable() { /// Instead of creating a persistent /failed/node_hash node /// we create a persistent /failed/node_hash.retriable node. diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h similarity index 95% rename from src/Storages/S3Queue/S3QueueIFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index e0b0d16cbcc..60fb403cfe0 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -6,7 +6,7 @@ namespace DB { -class S3QueueIFileMetadata +class ObjectStorageQueueIFileMetadata { public: struct FileStatus @@ -41,7 +41,7 @@ public: }; using FileStatusPtr = std::shared_ptr; - explicit S3QueueIFileMetadata( + explicit ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -50,7 +50,7 @@ public: size_t max_loading_retries_, LoggerPtr log_); - virtual ~S3QueueIFileMetadata(); + virtual ~ObjectStorageQueueIFileMetadata(); bool setProcessing(); void setProcessed(); @@ -92,7 +92,7 @@ protected: LoggerPtr log; /// processing node is ephemeral, so we cannot verify with it if - /// this node was created by a certain processor on a previous s3 queue processing stage, + /// this node was created by a certain processor on a previous processing stage, /// because we could get a session expired in between the stages /// and someone else could just create this processing node. /// Therefore we also create a persistent processing node diff --git a/src/Storages/S3Queue/S3QueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp similarity index 74% rename from src/Storages/S3Queue/S3QueueMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp index e828e9f0716..5eb6cfeaf4a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp @@ -4,13 +4,12 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -63,7 +62,7 @@ namespace } } -class S3QueueMetadata::LocalFileStatuses +class ObjectStorageQueueMetadata::LocalFileStatuses { public: LocalFileStatuses() = default; @@ -114,90 +113,81 @@ private: } }; -S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_) +ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_) : settings(settings_) , zookeeper_path(zookeeper_path_) , buckets_num(getBucketsNum(settings_)) - , log(getLogger("StorageS3Queue(" + zookeeper_path_.string() + ")")) + , log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")")) , local_file_statuses(std::make_shared()) { - if (settings.mode == S3QueueMode::UNORDERED - && (settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec)) + if (settings.mode == ObjectStorageQueueMode::UNORDERED + && (settings.tracked_files_limit || settings.tracked_file_ttl_sec)) { task = Context::getGlobalContextInstance()->getSchedulePool().createTask( - "S3QueueCleanupFunc", + "ObjectStorageQueueCleanupFunc", [this] { cleanupThreadFunc(); }); task->activate(); task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } } -S3QueueMetadata::~S3QueueMetadata() +ObjectStorageQueueMetadata::~ObjectStorageQueueMetadata() { shutdown(); } -void S3QueueMetadata::shutdown() +void ObjectStorageQueueMetadata::shutdown() { shutdown_called = true; if (task) task->deactivate(); } -void S3QueueMetadata::checkSettings(const S3QueueSettings & settings_) const +void ObjectStorageQueueMetadata::checkSettings(const ObjectStorageQueueSettings & settings_) const { - S3QueueTableMetadata::checkEquals(settings, settings_); + ObjectStorageQueueTableMetadata::checkEquals(settings, settings_); } -S3QueueMetadata::FileStatusPtr S3QueueMetadata::getFileStatus(const std::string & path) +ObjectStorageQueueMetadata::FileStatusPtr ObjectStorageQueueMetadata::getFileStatus(const std::string & path) { return local_file_statuses->get(path, /* create */false); } -S3QueueMetadata::FileStatuses S3QueueMetadata::getFileStatuses() const +ObjectStorageQueueMetadata::FileStatuses ObjectStorageQueueMetadata::getFileStatuses() const { return local_file_statuses->getAll(); } -S3QueueMetadata::FileMetadataPtr S3QueueMetadata::getFileMetadata( +ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileMetadata( const std::string & path, - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info) + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info) { auto file_status = local_file_statuses->get(path, /* create */true); switch (settings.mode.value) { - case S3QueueMode::ORDERED: - return std::make_shared( + case ObjectStorageQueueMode::ORDERED: + return std::make_shared( zookeeper_path, path, file_status, bucket_info, buckets_num, - settings.s3queue_loading_retries, + settings.loading_retries, log); - case S3QueueMode::UNORDERED: - return std::make_shared( + case ObjectStorageQueueMode::UNORDERED: + return std::make_shared( zookeeper_path, path, file_status, - settings.s3queue_loading_retries, + settings.loading_retries, log); } } -size_t S3QueueMetadata::getBucketsNum(const S3QueueSettings & settings) -{ - if (settings.s3queue_buckets) - return settings.s3queue_buckets; - if (settings.s3queue_processing_threads_num) - return settings.s3queue_processing_threads_num; - return 0; -} - -size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings) { if (settings.buckets) return settings.buckets; @@ -206,32 +196,41 @@ size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) return 0; } -bool S3QueueMetadata::useBucketsForProcessing() const +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings) { - return settings.mode == S3QueueMode::ORDERED && (buckets_num > 1); + if (settings.buckets) + return settings.buckets; + if (settings.processing_threads_num) + return settings.processing_threads_num; + return 0; } -S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & path) const +bool ObjectStorageQueueMetadata::useBucketsForProcessing() const { - return S3QueueOrderedFileMetadata::getBucketForPath(path, buckets_num); + return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1); } -S3QueueOrderedFileMetadata::BucketHolderPtr -S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const { - return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); + return ObjectStorageQueueOrderedFileMetadata::getBucketForPath(path, buckets_num); } -void S3QueueMetadata::initialize( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr +ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +{ + return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); +} + +void ObjectStorageQueueMetadata::initialize( const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata) { - const auto metadata_from_table = S3QueueTableMetadata(*configuration, settings, storage_metadata); + const auto metadata_from_table = ObjectStorageQueueTableMetadata(*configuration, settings, storage_metadata); const auto & columns_from_table = storage_metadata.getColumns(); const auto table_metadata_path = zookeeper_path / "metadata"; - const auto metadata_paths = settings.mode == S3QueueMode::ORDERED - ? S3QueueOrderedFileMetadata::getMetadataPaths(buckets_num) - : S3QueueUnorderedFileMetadata::getMetadataPaths(); + const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED + ? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num) + : ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths(); auto zookeeper = getZooKeeper(); zookeeper->createAncestors(zookeeper_path); @@ -240,7 +239,7 @@ void S3QueueMetadata::initialize( { if (zookeeper->exists(table_metadata_path)) { - const auto metadata_from_zk = S3QueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); + const auto metadata_from_zk = ObjectStorageQueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); const auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); metadata_from_table.checkEquals(metadata_from_zk); @@ -265,8 +264,8 @@ void S3QueueMetadata::initialize( requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); } - if (!settings.s3queue_last_processed_path.value.empty()) - getFileMetadata(settings.s3queue_last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); + if (!settings.last_processed_path.value.empty()) + getFileMetadata(settings.last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); Coordination::Responses responses; auto code = zookeeper->tryMulti(requests, responses); @@ -290,10 +289,10 @@ void S3QueueMetadata::initialize( "of wrong zookeeper path or because of logical error"); } -void S3QueueMetadata::cleanupThreadFunc() +void ObjectStorageQueueMetadata::cleanupThreadFunc() { /// A background task is responsible for maintaining - /// settings.s3queue_tracked_files_limit and max_set_age settings for `unordered` processing mode. + /// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode. if (shutdown_called) return; @@ -312,10 +311,10 @@ void S3QueueMetadata::cleanupThreadFunc() task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } -void S3QueueMetadata::cleanupThreadFuncImpl() +void ObjectStorageQueueMetadata::cleanupThreadFuncImpl() { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); const auto zk_client = getZooKeeper(); @@ -355,11 +354,11 @@ void S3QueueMetadata::cleanupThreadFuncImpl() return; } - chassert(settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec); - const bool check_nodes_limit = settings.s3queue_tracked_files_limit > 0; - const bool check_nodes_ttl = settings.s3queue_tracked_file_ttl_sec > 0; + chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec); + const bool check_nodes_limit = settings.tracked_files_limit > 0; + const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0; - const bool nodes_limit_exceeded = nodes_num > settings.s3queue_tracked_files_limit; + const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit; if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl) { LOG_TEST(log, "No limit exceeded"); @@ -381,7 +380,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() struct Node { std::string zk_path; - S3QueueIFileMetadata::NodeMetadata metadata; + ObjectStorageQueueIFileMetadata::NodeMetadata metadata; }; auto node_cmp = [](const Node & a, const Node & b) { @@ -402,7 +401,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() std::string metadata_str; if (zk_client->tryGet(path, metadata_str)) { - sorted_nodes.emplace(path, S3QueueIFileMetadata::NodeMetadata::fromString(metadata_str)); + sorted_nodes.emplace(path, ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(metadata_str)); LOG_TEST(log, "Fetched metadata for node {}", path); } else @@ -432,9 +431,9 @@ void S3QueueMetadata::cleanupThreadFuncImpl() wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp); return wb.str(); }; - LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.s3queue_tracked_files_limit, settings.s3queue_tracked_file_ttl_sec, get_nodes_str()); + LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str()); - size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.s3queue_tracked_files_limit : 0; + size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0; for (const auto & node : sorted_nodes) { if (nodes_to_remove) @@ -453,7 +452,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() else if (check_nodes_ttl) { UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp; - if (node_age >= settings.s3queue_tracked_file_ttl_sec) + if (node_age >= settings.tracked_file_ttl_sec) { LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached", node.metadata.file_path, node.zk_path); diff --git a/src/Storages/S3Queue/S3QueueMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h index 25d01fb52b9..05060931b5a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h @@ -7,23 +7,23 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace fs = std::filesystem; namespace Poco { class Logger; } namespace DB { -struct S3QueueSettings; -class StorageS3Queue; -struct S3QueueTableMetadata; +struct ObjectStorageQueueSettings; +class StorageObjectStorageQueue; +struct ObjectStorageQueueTableMetadata; struct StorageInMemoryMetadata; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; /** - * A class for managing S3Queue metadata in zookeeper, e.g. + * A class for managing ObjectStorageQueue metadata in zookeeper, e.g. * the following folders: * - /processed * - /processing @@ -35,7 +35,7 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * - /processing * - /failed * - * Depending on S3Queue processing mode (ordered or unordered) + * Depending on ObjectStorageQueue processing mode (ordered or unordered) * we can differently store metadata in /processed node. * * Implements caching of zookeeper metadata for faster responses. @@ -44,24 +44,24 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set * starts a background cleanup thread which is responsible for maintaining them. */ -class S3QueueMetadata +class ObjectStorageQueueMetadata { public: - using FileStatus = S3QueueIFileMetadata::FileStatus; - using FileMetadataPtr = std::shared_ptr; + using FileStatus = ObjectStorageQueueIFileMetadata::FileStatus; + using FileMetadataPtr = std::shared_ptr; using FileStatusPtr = std::shared_ptr; using FileStatuses = std::unordered_map; using Bucket = size_t; using Processor = std::string; - S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_); - ~S3QueueMetadata(); + ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_); + ~ObjectStorageQueueMetadata(); void initialize(const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata); - void checkSettings(const S3QueueSettings & settings) const; + void checkSettings(const ObjectStorageQueueSettings & settings) const; void shutdown(); - FileMetadataPtr getFileMetadata(const std::string & path, S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); + FileMetadataPtr getFileMetadata(const std::string & path, ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); FileStatusPtr getFileStatus(const std::string & path); FileStatuses getFileStatuses() const; @@ -69,16 +69,16 @@ public: /// Method of Ordered mode parallel processing. bool useBucketsForProcessing() const; Bucket getBucketForPath(const std::string & path) const; - S3QueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); + ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); - static size_t getBucketsNum(const S3QueueSettings & settings); - static size_t getBucketsNum(const S3QueueTableMetadata & settings); + static size_t getBucketsNum(const ObjectStorageQueueSettings & settings); + static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings); private: void cleanupThreadFunc(); void cleanupThreadFuncImpl(); - const S3QueueSettings settings; + const ObjectStorageQueueSettings settings; const fs::path zookeeper_path; const size_t buckets_num; diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadataFactory.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp index a319b21ca3e..ffae33d6f41 100644 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,20 +8,20 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -S3QueueMetadataFactory & S3QueueMetadataFactory::instance() +ObjectStorageQueueMetadataFactory & ObjectStorageQueueMetadataFactory::instance() { - static S3QueueMetadataFactory ret; + static ObjectStorageQueueMetadataFactory ret; return ret; } -S3QueueMetadataFactory::FilesMetadataPtr -S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings) +ObjectStorageQueueMetadataFactory::FilesMetadataPtr +ObjectStorageQueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); if (it == metadata_by_path.end()) { - auto files_metadata = std::make_shared(zookeeper_path, settings); + auto files_metadata = std::make_shared(zookeeper_path, settings); it = metadata_by_path.emplace(zookeeper_path, std::move(files_metadata)).first; } else @@ -32,7 +32,7 @@ S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3 return it->second.metadata; } -void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) +void ObjectStorageQueueMetadataFactory::remove(const std::string & zookeeper_path) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); @@ -57,9 +57,9 @@ void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) } } -std::unordered_map S3QueueMetadataFactory::getAll() +std::unordered_map ObjectStorageQueueMetadataFactory::getAll() { - std::unordered_map result; + std::unordered_map result; for (const auto & [zk_path, metadata_and_ref_count] : metadata_by_path) result.emplace(zk_path, metadata_and_ref_count.metadata); return result; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h new file mode 100644 index 00000000000..a93f5ee3d83 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ObjectStorageQueueMetadataFactory final : private boost::noncopyable +{ +public: + using FilesMetadataPtr = std::shared_ptr; + + static ObjectStorageQueueMetadataFactory & instance(); + + FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings); + + void remove(const std::string & zookeeper_path); + + std::unordered_map getAll(); + +private: + struct Metadata + { + explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} + + std::shared_ptr metadata; + /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. + size_t ref_count = 0; + }; + using MetadataByPath = std::unordered_map; + + MetadataByPath metadata_by_path; + std::mutex mutex; +}; + +} diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp similarity index 89% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp index bac87c95cc9..f729ef00a0a 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -16,7 +16,7 @@ namespace ErrorCodes namespace { - S3QueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) + ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) { return sipHash64(path) % buckets_num; } @@ -40,7 +40,7 @@ namespace } } -S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( +ObjectStorageQueueOrderedFileMetadata::BucketHolder::BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, @@ -55,13 +55,13 @@ S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( { } -void S3QueueOrderedFileMetadata::BucketHolder::release() +void ObjectStorageQueueOrderedFileMetadata::BucketHolder::release() { if (released) return; released = true; - LOG_TEST(getLogger("S3QueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); + LOG_TEST(getLogger("ObjectStorageQueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); Coordination::Requests requests; /// Check that bucket lock version has not changed @@ -75,7 +75,7 @@ void S3QueueOrderedFileMetadata::BucketHolder::release() zkutil::KeeperMultiException::check(code, requests, responses); } -S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() +ObjectStorageQueueOrderedFileMetadata::BucketHolder::~BucketHolder() { try { @@ -87,7 +87,7 @@ S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() } } -S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( +ObjectStorageQueueOrderedFileMetadata::ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -95,7 +95,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( size_t buckets_num_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path_ / "processing" / getNodeName(path_), /* processed_node_path */getProcessedPath(zk_path_, path_, buckets_num_), @@ -109,7 +109,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( { } -std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) +std::vector ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) { if (buckets_num > 1) { @@ -122,7 +122,7 @@ std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buc return {"failed", "processing"}; } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const zkutil::ZooKeeperPtr & zk_client) @@ -130,7 +130,7 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return getMaxProcessedFile(result, stat, processed_node_path, zk_client); } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const std::string & processed_node_path_, @@ -146,12 +146,12 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return false; } -S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) +ObjectStorageQueueOrderedFileMetadata::Bucket ObjectStorageQueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) { return getBucketForPathImpl(path_, buckets_num); } -S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, const Processor & processor) @@ -172,7 +172,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); /// Update bucket lock id path. We use its version as a version of ephemeral bucket lock node. - /// (See comment near S3QueueIFileMetadata::processing_node_version). + /// (See comment near ObjectStorageQueueIFileMetadata::processing_node_version). requests.push_back(zkutil::makeSetRequest(bucket_lock_id_path, processor_info, -1)); Coordination::Responses responses; @@ -183,7 +183,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui const auto bucket_lock_version = set_response->stat.version; LOG_TEST( - getLogger("S3QueueOrderedFileMetadata"), + getLogger("ObjectStorageQueueOrderedFileMetadata"), "Processor {} acquired bucket {} for processing (bucket lock version: {})", processor, bucket, bucket_lock_version); @@ -204,7 +204,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", code); } -std::pair S3QueueOrderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueOrderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -300,7 +300,7 @@ std::pair S3QueueOrderedFileMetad } } -void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) { @@ -318,7 +318,7 @@ void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( } } -void S3QueueOrderedFileMetadata::setProcessedRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client, const std::string & processed_node_path_, @@ -359,7 +359,7 @@ void S3QueueOrderedFileMetadata::setProcessedRequests( } } -void S3QueueOrderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueOrderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h similarity index 86% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h index 698ec0f54cc..c5e9744b57c 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -7,7 +7,7 @@ namespace DB { -class S3QueueOrderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueOrderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Processor = std::string; @@ -21,7 +21,7 @@ public: }; using BucketInfoPtr = std::shared_ptr; - explicit S3QueueOrderedFileMetadata( + explicit ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -38,7 +38,7 @@ public: const Bucket & bucket, const Processor & processor); - static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); + static ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); static std::vector getMetadataPaths(size_t buckets_num); @@ -72,7 +72,7 @@ private: bool ignore_if_exists); }; -struct S3QueueOrderedFileMetadata::BucketHolder +struct ObjectStorageQueueOrderedFileMetadata::BucketHolder { BucketHolder( const Bucket & bucket_, diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp similarity index 59% rename from src/Storages/S3Queue/S3QueueSettings.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp index cb312adc5d9..3458105de8c 100644 --- a/src/Storages/S3Queue/S3QueueSettings.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,14 +13,19 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) -void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +void ObjectStorageQueueSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { + /// We support settings starting with s3_ for compatibility. + for (auto & change : storage_def.settings->changes) + if (change.name.starts_with("s3queue_")) + change.name = change.name.substr(std::strlen("s3queue_")); + applyChanges(storage_def.settings->changes); } catch (Exception & e) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h new file mode 100644 index 00000000000..afe2730662c --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(ObjectStorageQueueMode, \ + mode, \ + ObjectStorageQueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt32, loading_retries, 0, "Retry loading up to specified number of times", 0) \ + M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ + M(UInt32, enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ + M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ + M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ + M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \ + M(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ + M(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ + M(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ + M(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ + +#define LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS(M, ALIAS) \ + OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) + + +struct ObjectStorageQueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp similarity index 89% rename from src/Storages/S3Queue/S3QueueSource.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index b5b1a8dd992..a56dd6ea343 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -5,17 +5,11 @@ #include #include #include -#include +#include #include #include -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; -} - namespace ProfileEvents { extern const Event S3QueuePullMicroseconds; @@ -26,12 +20,11 @@ namespace DB namespace ErrorCodes { - extern const int S3_ERROR; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } -StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( +ObjectStorageQueueSource::ObjectStorageQueueObjectInfo::ObjectStorageQueueObjectInfo( const ObjectInfo & object_info, Metadata::FileMetadataPtr processing_holder_) : ObjectInfo(object_info.relative_path, object_info.metadata) @@ -39,12 +32,12 @@ StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( { } -StorageS3QueueSource::FileIterator::FileIterator( - std::shared_ptr metadata_, +ObjectStorageQueueSource::FileIterator::FileIterator( + std::shared_ptr metadata_, std::unique_ptr glob_iterator_, std::atomic & shutdown_called_, LoggerPtr logger_) - : StorageObjectStorageSource::IIterator("S3QueueIterator") + : StorageObjectStorageSource::IIterator("ObjectStorageQueueIterator") , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) @@ -52,15 +45,15 @@ StorageS3QueueSource::FileIterator::FileIterator( { } -size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() +size_t ObjectStorageQueueSource::FileIterator::estimatedKeysCount() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); } -StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) +ObjectStorageQueueSource::ObjectInfoPtr ObjectStorageQueueSource::FileIterator::nextImpl(size_t processor) { ObjectInfoPtr object_info; - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info; + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info; while (!shutdown_called) { @@ -80,13 +73,13 @@ StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl auto file_metadata = metadata->getFileMetadata(object_info->relative_path, bucket_info); if (file_metadata->setProcessing()) - return std::make_shared(*object_info, file_metadata); + return std::make_shared(*object_info, file_metadata); } return {}; } -std::pair -StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) +std::pair +ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) { /// We need this lock to maintain consistency between listing s3 directory /// and getting/putting result into listed_keys_cache. @@ -287,19 +280,19 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo } } -StorageS3QueueSource::StorageS3QueueSource( +ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, const Block & header_, std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, + std::shared_ptr s3_queue_log_, const StorageID & storage_id_, LoggerPtr log_) : ISource(header_) @@ -319,12 +312,12 @@ StorageS3QueueSource::StorageS3QueueSource( { } -String StorageS3QueueSource::getName() const +String ObjectStorageQueueSource::getName() const { return name; } -void StorageS3QueueSource::lazyInitialize(size_t processor) +void ObjectStorageQueueSource::lazyInitialize(size_t processor) { if (initialized) return; @@ -336,7 +329,7 @@ void StorageS3QueueSource::lazyInitialize(size_t processor) initialized = true; } -Chunk StorageS3QueueSource::generate() +Chunk ObjectStorageQueueSource::generate() { lazyInitialize(processor_id); @@ -345,7 +338,7 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - const auto * object_info = dynamic_cast(&reader.getObjectInfo()); + const auto * object_info = dynamic_cast(&reader.getObjectInfo()); auto file_metadata = object_info->processing_holder; auto file_status = file_metadata->getFileStatus(); @@ -473,33 +466,33 @@ Chunk StorageS3QueueSource::generate() return {}; } -void StorageS3QueueSource::applyActionAfterProcessing(const String & path) +void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { switch (action) { - case S3QueueAction::DELETE: + case ObjectStorageQueueAction::DELETE: { assert(remove_file_func); remove_file_func(path); break; } - case S3QueueAction::KEEP: + case ObjectStorageQueueAction::KEEP: break; } } -void StorageS3QueueSource::appendLogElement( +void ObjectStorageQueueSource::appendLogElement( const std::string & filename, - S3QueueMetadata::FileStatus & file_status_, + ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed) { if (!s3_queue_log) return; - S3QueueLogElement elem{}; + ObjectStorageQueueLogElement elem{}; { - elem = S3QueueLogElement + elem = ObjectStorageQueueLogElement { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .database = storage_id.database_name, @@ -507,7 +500,7 @@ void StorageS3QueueSource::appendLogElement( .uuid = toString(storage_id.uuid), .file_name = filename, .rows_processed = processed_rows, - .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed, + .status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed, .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), .processing_start_time = file_status_.processing_start_time, .processing_end_time = file_status_.processing_end_time, diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h similarity index 69% rename from src/Storages/S3Queue/S3QueueSource.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index 6e098f8cb63..a6ebaf06b5f 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include @@ -16,7 +16,7 @@ namespace DB struct ObjectMetadata; -class StorageS3QueueSource : public ISource, WithContext +class ObjectStorageQueueSource : public ISource, WithContext { public: using Storage = StorageObjectStorage; @@ -24,16 +24,16 @@ public: using GlobIterator = StorageObjectStorageSource::GlobIterator; using ZooKeeperGetter = std::function; using RemoveFileFunc = std::function; - using FileStatusPtr = S3QueueMetadata::FileStatusPtr; + using FileStatusPtr = ObjectStorageQueueMetadata::FileStatusPtr; using ReaderHolder = StorageObjectStorageSource::ReaderHolder; - using Metadata = S3QueueMetadata; + using Metadata = ObjectStorageQueueMetadata; using ObjectInfo = StorageObjectStorageSource::ObjectInfo; using ObjectInfoPtr = std::shared_ptr; using ObjectInfos = std::vector; - struct S3QueueObjectInfo : public ObjectInfo + struct ObjectStorageQueueObjectInfo : public ObjectInfo { - S3QueueObjectInfo( + ObjectStorageQueueObjectInfo( const ObjectInfo & object_info, Metadata::FileMetadataPtr processing_holder_); @@ -44,7 +44,7 @@ public: { public: FileIterator( - std::shared_ptr metadata_, + std::shared_ptr metadata_, std::unique_ptr glob_iterator_, std::atomic & shutdown_called_, LoggerPtr logger_); @@ -57,10 +57,10 @@ public: size_t estimatedKeysCount() override; private: - using Bucket = S3QueueMetadata::Bucket; - using Processor = S3QueueMetadata::Processor; + using Bucket = ObjectStorageQueueMetadata::Bucket; + using Processor = ObjectStorageQueueMetadata::Processor; - const std::shared_ptr metadata; + const std::shared_ptr metadata; const std::unique_ptr glob_iterator; std::atomic & shutdown_called; @@ -75,24 +75,24 @@ public: }; std::unordered_map listed_keys_cache; bool iterator_finished = false; - std::unordered_map bucket_holders; + std::unordered_map bucket_holders; - std::pair getNextKeyFromAcquiredBucket(size_t processor); + std::pair getNextKeyFromAcquiredBucket(size_t processor); }; - StorageS3QueueSource( + ObjectStorageQueueSource( String name_, size_t processor_id_, const Block & header_, std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, + std::shared_ptr s3_queue_log_, const StorageID & storage_id_, LoggerPtr log_); @@ -105,13 +105,13 @@ public: private: const String name; const size_t processor_id; - const S3QueueAction action; - const std::shared_ptr files_metadata; + const ObjectStorageQueueAction action; + const std::shared_ptr files_metadata; const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; - const std::shared_ptr s3_queue_log; + const std::shared_ptr s3_queue_log; const StorageID storage_id; RemoveFileFunc remove_file_func; @@ -122,10 +122,10 @@ private: std::atomic initialized{false}; size_t processed_rows_from_file = 0; - S3QueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; + ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; void applyActionAfterProcessing(const String & path); - void appendLogElement(const std::string & filename, S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void appendLogElement(const std::string & filename, ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); void lazyInitialize(size_t processor); }; diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp similarity index 72% rename from src/Storages/S3Queue/S3QueueTableMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp index ecaa7ad57cc..cb9cdf8e186 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp @@ -3,9 +3,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -20,33 +20,33 @@ namespace ErrorCodes namespace { - S3QueueMode modeFromString(const std::string & mode) + ObjectStorageQueueMode modeFromString(const std::string & mode) { if (mode == "ordered") - return S3QueueMode::ORDERED; + return ObjectStorageQueueMode::ORDERED; if (mode == "unordered") - return S3QueueMode::UNORDERED; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected S3Queue mode: {}", mode); + return ObjectStorageQueueMode::UNORDERED; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected ObjectStorageQueue mode: {}", mode); } } -S3QueueTableMetadata::S3QueueTableMetadata( +ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { format_name = configuration.format; after_processing = engine_settings.after_processing.toString(); mode = engine_settings.mode.toString(); - tracked_files_limit = engine_settings.s3queue_tracked_files_limit; - tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; - buckets = engine_settings.s3queue_buckets; - processing_threads_num = engine_settings.s3queue_processing_threads_num; + tracked_files_limit = engine_settings.tracked_files_limit; + tracked_file_ttl_sec = engine_settings.tracked_file_ttl_sec; + buckets = engine_settings.buckets; + processing_threads_num = engine_settings.processing_threads_num; columns = storage_metadata.getColumns().toString(); } -String S3QueueTableMetadata::toString() const +String ObjectStorageQueueTableMetadata::toString() const { Poco::JSON::Object json; json.set("after_processing", after_processing); @@ -65,7 +65,7 @@ String S3QueueTableMetadata::toString() const return oss.str(); } -void S3QueueTableMetadata::read(const String & metadata_str) +void ObjectStorageQueueTableMetadata::read(const String & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -102,19 +102,19 @@ void S3QueueTableMetadata::read(const String & metadata_str) buckets = json->getValue("buckets"); } -S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +ObjectStorageQueueTableMetadata ObjectStorageQueueTableMetadata::parse(const String & metadata_str) { - S3QueueTableMetadata metadata; + ObjectStorageQueueTableMetadata metadata; metadata.read(metadata_str); return metadata; } -void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const { checkImmutableFieldsEquals(from_zk); } -void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const { if (after_processing != from_zk.after_processing) throw Exception( @@ -164,29 +164,29 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata from_zk.last_processed_path, last_processed_path); - if (modeFromString(mode) == S3QueueMode::ORDERED) + if (modeFromString(mode) == ObjectStorageQueueMode::ORDERED) { if (buckets != from_zk.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", from_zk.buckets, buckets); } - if (S3QueueMetadata::getBucketsNum(*this) != S3QueueMetadata::getBucketsNum(from_zk)) + if (ObjectStorageQueueMetadata::getBucketsNum(*this) != ObjectStorageQueueMetadata::getBucketsNum(from_zk)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(*this), S3QueueMetadata::getBucketsNum(from_zk)); + ObjectStorageQueueMetadata::getBucketsNum(*this), ObjectStorageQueueMetadata::getBucketsNum(from_zk)); } } } -void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected) +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected) { if (current.after_processing != expected.after_processing) throw Exception( @@ -204,48 +204,48 @@ void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3 expected.mode.toString(), current.mode.toString()); - if (current.s3queue_tracked_files_limit != expected.s3queue_tracked_files_limit) + if (current.tracked_files_limit != expected.tracked_files_limit) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set size. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_files_limit, - current.s3queue_tracked_files_limit); + expected.tracked_files_limit, + current.tracked_files_limit); - if (current.s3queue_tracked_file_ttl_sec != expected.s3queue_tracked_file_ttl_sec) + if (current.tracked_file_ttl_sec != expected.tracked_file_ttl_sec) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set age. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_file_ttl_sec, - current.s3queue_tracked_file_ttl_sec); + expected.tracked_file_ttl_sec, + current.tracked_file_ttl_sec); - if (current.s3queue_last_processed_path.value != expected.s3queue_last_processed_path.value) + if (current.last_processed_path.value != expected.last_processed_path.value) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in last_processed_path. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_last_processed_path.value, - current.s3queue_last_processed_path.value); + expected.last_processed_path.value, + current.last_processed_path.value); - if (current.mode == S3QueueMode::ORDERED) + if (current.mode == ObjectStorageQueueMode::ORDERED) { - if (current.s3queue_buckets != expected.s3queue_buckets) + if (current.buckets != expected.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_buckets, current.s3queue_buckets); + expected.buckets, current.buckets); } - if (S3QueueMetadata::getBucketsNum(current) != S3QueueMetadata::getBucketsNum(expected)) + if (ObjectStorageQueueMetadata::getBucketsNum(current) != ObjectStorageQueueMetadata::getBucketsNum(expected)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(current), S3QueueMetadata::getBucketsNum(expected)); + ObjectStorageQueueMetadata::getBucketsNum(current), ObjectStorageQueueMetadata::getBucketsNum(expected)); } } } diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h similarity index 50% rename from src/Storages/S3Queue/S3QueueTableMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h index d53b60570ae..bbae06b66c6 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -11,10 +11,10 @@ namespace DB class WriteBuffer; class ReadBuffer; -/** The basic parameters of S3Queue table engine for saving in ZooKeeper. +/** The basic parameters of ObjectStorageQueue table engine for saving in ZooKeeper. * Lets you verify that they match local ones. */ -struct S3QueueTableMetadata +struct ObjectStorageQueueTableMetadata { String format_name; String columns; @@ -26,22 +26,22 @@ struct S3QueueTableMetadata UInt64 processing_threads_num = 1; String last_processed_path; - S3QueueTableMetadata() = default; - S3QueueTableMetadata( + ObjectStorageQueueTableMetadata() = default; + ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); - static S3QueueTableMetadata parse(const String & metadata_str); + static ObjectStorageQueueTableMetadata parse(const String & metadata_str); String toString() const; - void checkEquals(const S3QueueTableMetadata & from_zk) const; - static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); + void checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const; + static void checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected); private: - void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; + void checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const; }; diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp similarity index 92% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp index c61e9557fc2..3a276953529 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -18,13 +18,13 @@ namespace } } -S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( +ObjectStorageQueueUnorderedFileMetadata::ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path / "processing" / getNodeName(path_), /* processed_node_path */zk_path / "processed" / getNodeName(path_), @@ -35,7 +35,7 @@ S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( { } -std::pair S3QueueUnorderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueUnorderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -89,7 +89,7 @@ std::pair S3QueueUnorderedFileMet throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", magic_enum::enum_name(code)); } -void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueUnorderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr &) { @@ -98,7 +98,7 @@ void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( processed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); } -void S3QueueUnorderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueUnorderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h similarity index 75% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h index 24c2765bf3a..cc5d8a09ec9 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h @@ -1,17 +1,17 @@ #pragma once -#include +#include #include #include namespace DB { -class S3QueueUnorderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueUnorderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Bucket = size_t; - explicit S3QueueUnorderedFileMetadata( + explicit ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp similarity index 59% rename from src/Storages/S3Queue/StorageS3Queue.cpp rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index afb75a21b21..489980aafa8 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -1,10 +1,7 @@ #include -#include "config.h" #include -#include #include -#include #include #include #include @@ -15,16 +12,15 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include #include -#include #include #include @@ -32,12 +28,6 @@ namespace fs = std::filesystem; -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; -} - namespace DB { @@ -45,23 +35,22 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int S3_ERROR; extern const int QUERY_NOT_ALLOWED; } namespace { - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) + std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const ObjectStorageQueueSettings & queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; if (zk_path_prefix.empty()) zk_path_prefix = "/"; std::string result_zk_path; - if (s3queue_settings.keeper_path.changed) + if (queue_settings.keeper_path.changed) { /// We do not add table uuid here on purpose. - result_zk_path = fs::path(zk_path_prefix) / s3queue_settings.keeper_path.value; + result_zk_path = fs::path(zk_path_prefix) / queue_settings.keeper_path.value; } else { @@ -71,35 +60,35 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, bool is_attach) + void checkAndAdjustSettings(ObjectStorageQueueSettings & queue_settings, const Settings & settings, bool is_attach) { - if (!is_attach && !s3queue_settings.mode.changed) + if (!is_attach && !queue_settings.mode.changed) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `mode` (Unordered/Ordered) is not specified, but is required."); } /// In case !is_attach, we leave Ordered mode as default for compatibility. - if (!s3queue_settings.s3queue_processing_threads_num) + if (!queue_settings.processing_threads_num) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `processing_threads_num` cannot be set to zero"); } - if (!s3queue_settings.s3queue_enable_logging_to_s3queue_log.changed) + if (!queue_settings.enable_logging_to_s3queue_log.changed) { - s3queue_settings.s3queue_enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log; + queue_settings.enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log; } - if (s3queue_settings.s3queue_cleanup_interval_min_ms > s3queue_settings.s3queue_cleanup_interval_max_ms) + if (queue_settings.cleanup_interval_min_ms > queue_settings.cleanup_interval_max_ms) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Setting `s3queue_cleanup_interval_min_ms` ({}) must be less or equal to `s3queue_cleanup_interval_max_ms` ({})", - s3queue_settings.s3queue_cleanup_interval_min_ms, s3queue_settings.s3queue_cleanup_interval_max_ms); + "Setting `cleanup_interval_min_ms` ({}) must be less or equal to `cleanup_interval_max_ms` ({})", + queue_settings.cleanup_interval_min_ms, queue_settings.cleanup_interval_max_ms); } } } -StorageS3Queue::StorageS3Queue( - std::unique_ptr s3queue_settings_, +StorageObjectStorageQueue::StorageObjectStorageQueue( + std::unique_ptr queue_settings_, const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -111,12 +100,12 @@ StorageS3Queue::StorageS3Queue( LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) - , s3queue_settings(std::move(s3queue_settings_)) - , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings)) + , queue_settings(std::move(queue_settings_)) + , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *queue_settings)) , configuration{configuration_} , format_settings(format_settings_) - , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(getLogger("StorageS3Queue (" + table_id_.getFullTableName() + ")")) + , reschedule_processing_interval_ms(queue_settings->polling_min_timeout_ms) + , log(getLogger("StorageObjectStorageQueue (" + table_id_.getFullTableName() + ")")) { if (configuration->getPath().empty()) { @@ -128,10 +117,10 @@ StorageS3Queue::StorageS3Queue( } else if (!configuration->isPathWithGlobs()) { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), mode > LoadingStrictnessLevel::CREATE); + checkAndAdjustSettings(*queue_settings, context_->getSettingsRef(), mode > LoadingStrictnessLevel::CREATE); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); @@ -149,30 +138,30 @@ StorageS3Queue::StorageS3Queue( setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); - task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = getContext()->getSchedulePool().createTask("ObjectStorageQueueStreamingTask", [this] { threadFunc(); }); - /// Get metadata manager from S3QueueMetadataFactory, + /// Get metadata manager from ObjectStorageQueueMetadataFactory, /// it will increase the ref count for the metadata object. - /// The ref count is decreased when StorageS3Queue::drop() method is called. - files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); + /// The ref count is decreased when StorageObjectStorageQueue::drop() method is called. + files_metadata = ObjectStorageQueueMetadataFactory::instance().getOrCreate(zk_path, *queue_settings); try { files_metadata->initialize(configuration_, storage_metadata); } catch (...) { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); throw; } } -void StorageS3Queue::startup() +void StorageObjectStorageQueue::startup() { if (task) task->activateAndSchedule(); } -void StorageS3Queue::shutdown(bool is_drop) +void StorageObjectStorageQueue::shutdown(bool is_drop) { table_is_being_dropped = is_drop; shutdown_called = true; @@ -191,31 +180,31 @@ void StorageS3Queue::shutdown(bool is_drop) LOG_TRACE(log, "Shut down storage"); } -void StorageS3Queue::drop() +void StorageObjectStorageQueue::drop() { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); } -bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const +bool StorageObjectStorageQueue::supportsSubsetOfColumns(const ContextPtr & context_) const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } -class ReadFromS3Queue : public SourceStepWithFilter +class ReadFromObjectStorageQueue : public SourceStepWithFilter { public: - std::string getName() const override { return "ReadFromS3Queue"; } + std::string getName() const override { return "ReadFromObjectStorageQueue"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void applyFilters(ActionDAGNodes added_filter_nodes) override; - ReadFromS3Queue( + ReadFromObjectStorageQueue( const Names & column_names_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, - std::shared_ptr storage_, + std::shared_ptr storage_, size_t max_block_size_) : SourceStepWithFilter( DataStream{.header = std::move(sample_block)}, @@ -231,15 +220,15 @@ public: private: ReadFromFormatInfo info; - std::shared_ptr storage; + std::shared_ptr storage; size_t max_block_size; - std::shared_ptr iterator; + std::shared_ptr iterator; void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) +void ReadFromObjectStorageQueue::createIterator(const ActionsDAG::Node * predicate) { if (iterator) return; @@ -248,7 +237,7 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) } -void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) +void ReadFromObjectStorageQueue::applyFilters(ActionDAGNodes added_filter_nodes) { SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); @@ -259,7 +248,7 @@ void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) createIterator(predicate); } -void StorageS3Queue::read( +void StorageObjectStorageQueue::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -281,10 +270,10 @@ void StorageS3Queue::read( "Cannot read from {} with attached materialized views", getName()); } - auto this_ptr = std::static_pointer_cast(shared_from_this()); + auto this_ptr = std::static_pointer_cast(shared_from_this()); auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - auto reading = std::make_unique( + auto reading = std::make_unique( column_names, query_info, storage_snapshot, @@ -297,10 +286,10 @@ void StorageS3Queue::read( query_plan.addStep(std::move(reading)); } -void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromObjectStorageQueue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - const size_t adjusted_num_streams = storage->s3queue_settings->s3queue_processing_threads_num; + const size_t adjusted_num_streams = storage->queue_settings->processing_threads_num; createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) @@ -320,10 +309,10 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const pipeline.init(std::move(pipe)); } -std::shared_ptr StorageS3Queue::createSource( +std::shared_ptr StorageObjectStorageQueue::createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, ContextPtr local_context) { @@ -343,14 +332,14 @@ std::shared_ptr StorageS3Queue::createSource( { object_storage->removeObject(StoredObject(path)); }; - auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; - return std::make_shared( + auto s3_queue_log = queue_settings->enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; + return std::make_shared( getName(), processor_id, info.source_header, std::move(internal_source), files_metadata, - s3queue_settings->after_processing, + queue_settings->after_processing, file_deleter, info.requested_virtual_columns, local_context, @@ -361,7 +350,7 @@ std::shared_ptr StorageS3Queue::createSource( log); } -bool StorageS3Queue::hasDependencies(const StorageID & table_id) +bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) { // Check if all dependencies are attached auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); @@ -386,7 +375,7 @@ bool StorageS3Queue::hasDependencies(const StorageID & table_id) return true; } -void StorageS3Queue::threadFunc() +void StorageObjectStorageQueue::threadFunc() { if (shutdown_called) return; @@ -404,12 +393,12 @@ void StorageS3Queue::threadFunc() if (streamToViews()) { /// Reset the reschedule interval. - reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + reschedule_processing_interval_ms = queue_settings->polling_min_timeout_ms; } else { /// Increase the reschedule interval. - reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + reschedule_processing_interval_ms += queue_settings->polling_backoff_ms; } LOG_DEBUG(log, "Stopped streaming to {} attached views", dependencies_count); @@ -426,12 +415,12 @@ void StorageS3Queue::threadFunc() if (!shutdown_called) { - LOG_TRACE(log, "Reschedule S3 Queue processing thread in {} ms", reschedule_processing_interval_ms); + LOG_TRACE(log, "Reschedule processing thread in {} ms", reschedule_processing_interval_ms); task->scheduleAfter(reschedule_processing_interval_ms); } } -bool StorageS3Queue::streamToViews() +bool StorageObjectStorageQueue::streamToViews() { auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); @@ -444,29 +433,29 @@ bool StorageS3Queue::streamToViews() auto insert = std::make_shared(); insert->table_id = table_id; - auto s3queue_context = Context::createCopy(getContext()); - s3queue_context->makeQueryContext(); + auto context = Context::createCopy(getContext()); + context->makeQueryContext(); // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + InterpreterInsertQuery interpreter(insert, context, false, true, true); auto block_io = interpreter.execute(); - auto file_iterator = createFileIterator(s3queue_context, nullptr); + auto file_iterator = createFileIterator(context, nullptr); - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); + auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(context)); Pipes pipes; - pipes.reserve(s3queue_settings->s3queue_processing_threads_num); - for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) + pipes.reserve(queue_settings->processing_threads_num); + for (size_t i = 0; i < queue_settings->processing_threads_num; ++i) { - auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); + auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, context); pipes.emplace_back(std::move(source)); } auto pipe = Pipe::unitePipes(std::move(pipes)); block_io.pipeline.complete(std::move(pipe)); - block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num); - block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control); + block_io.pipeline.setNumThreads(queue_settings->processing_threads_num); + block_io.pipeline.setConcurrencyControl(context->getSettingsRef().use_concurrency_control); std::atomic_size_t rows = 0; block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); @@ -477,12 +466,12 @@ bool StorageS3Queue::streamToViews() return rows > 0; } -zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +zkutil::ZooKeeperPtr StorageObjectStorageQueue::getZooKeeper() const { return getContext()->getZooKeeper(); } -std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) +std::shared_ptr StorageObjectStorageQueue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto settings = configuration->getQuerySettings(local_context); auto glob_iterator = std::make_unique( @@ -491,73 +480,4 @@ std::shared_ptr StorageS3Queue::createFileIterator return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } -#if USE_AWS_S3 -void registerStorageS3Queue(StorageFactory & factory) -{ - factory.registerStorage( - "S3Queue", - [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = std::make_shared(); - StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); - - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - - auto s3queue_settings = std::make_unique(); - if (args.storage_def->settings) - { - s3queue_settings->loadFromQuery(*args.storage_def); - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - else - LOG_TRACE(getLogger("StorageS3"), "Remove: {}", change.name); - args.storage_def->settings->changes.removeSetting(change.name); - } - - for (const auto & change : args.storage_def->settings->changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.applyChange(change); - } - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - return std::make_shared( - std::move(s3queue_settings), - std::move(configuration), - args.table_id, - args.columns, - args.constraints, - args.comment, - args.getContext(), - format_settings, - args.storage_def, - args.mode); - }, - { - .supports_settings = true, - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} -#endif - } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h similarity index 75% rename from src/Storages/S3Queue/StorageS3Queue.h rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index ef83a1ccc25..fb5b4d679b3 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -5,25 +5,24 @@ #include #include #include -#include -#include +#include +#include #include #include -#include #include namespace DB { -class S3QueueMetadata; +class ObjectStorageQueueMetadata; -class StorageS3Queue : public IStorage, WithContext +class StorageObjectStorageQueue : public IStorage, WithContext { public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; - StorageS3Queue( - std::unique_ptr s3queue_settings_, + StorageObjectStorageQueue( + std::unique_ptr queue_settings_, ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -34,7 +33,7 @@ public: ASTStorage * engine_args, LoadingStrictnessLevel mode); - String getName() const override { return "S3Queue"; } + String getName() const override { return "ObjectStorageQueue"; } void read( QueryPlan & query_plan, @@ -53,13 +52,13 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; private: - friend class ReadFromS3Queue; - using FileIterator = StorageS3QueueSource::FileIterator; + friend class ReadFromObjectStorageQueue; + using FileIterator = ObjectStorageQueueSource::FileIterator; - const std::unique_ptr s3queue_settings; + const std::unique_ptr queue_settings; const fs::path zk_path; - std::shared_ptr files_metadata; + std::shared_ptr files_metadata; ConfigurationPtr configuration; ObjectStoragePtr object_storage; @@ -83,10 +82,10 @@ private: bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); - std::shared_ptr createSource( + std::shared_ptr createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, ContextPtr local_context); diff --git a/src/Storages/ObjectStorageQueue/registerS3Queue.cpp b/src/Storages/ObjectStorageQueue/registerS3Queue.cpp new file mode 100644 index 00000000000..08b8104ae49 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/registerS3Queue.cpp @@ -0,0 +1,87 @@ +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void registerStorageS3Queue(StorageFactory & factory) +{ + factory.registerStorage( + "S3Queue", + [](const StorageFactory::Arguments & args) + { + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + return std::make_shared( + std::move(queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + args.storage_def, + args.mode); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +} +#endif diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.h b/src/Storages/S3Queue/S3QueueMetadataFactory.h deleted file mode 100644 index 80e96f8aa7e..00000000000 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -class S3QueueMetadataFactory final : private boost::noncopyable -{ -public: - using FilesMetadataPtr = std::shared_ptr; - - static S3QueueMetadataFactory & instance(); - - FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings); - - void remove(const std::string & zookeeper_path); - - std::unordered_map getAll(); - -private: - struct Metadata - { - explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} - - std::shared_ptr metadata; - /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. - size_t ref_count = 0; - }; - using MetadataByPath = std::unordered_map; - - MetadataByPath metadata_by_path; - std::mutex mutex; -}; - -} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h deleted file mode 100644 index 4a92d99c411..00000000000 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ -class ASTStorage; - - -#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - M(S3QueueMode, \ - mode, \ - S3QueueMode::ORDERED, \ - "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ - "With ordered mode, only the max name of the successfully consumed file stored.", \ - 0) \ - M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ - M(String, keeper_path, "", "Zookeeper node path", 0) \ - M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ - M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \ - M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ - M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ - M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ - M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ - M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ - M(UInt32, s3queue_polling_backoff_ms, 1000, "Polling backoff", 0) \ - M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ - M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ - M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ - M(UInt32, s3queue_buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ - -#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ - S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) - - -struct S3QueueSettings : public BaseSettings -{ - void loadFromQuery(ASTStorage & storage_def); -}; - -} diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 637182067f2..0920a9b19e7 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -11,9 +11,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -43,7 +43,7 @@ StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_) void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { - for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) + for (const auto & [zookeeper_path, metadata] : ObjectStorageQueueMetadataFactory::instance().getAll()) { for (const auto & [file_name, file_status] : metadata->getFileStatuses()) { diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 66631c51b03..3c55f0ff7f7 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -778,10 +778,10 @@ def test_max_set_age(started_cluster): files_path, additional_settings={ "keeper_path": keeper_path, - "s3queue_tracked_file_ttl_sec": max_age, - "s3queue_cleanup_interval_min_ms": 0, - "s3queue_cleanup_interval_max_ms": 0, - "s3queue_loading_retries": 0, + "tracked_file_ttl_sec": max_age, + "cleanup_interval_min_ms": 0, + "cleanup_interval_max_ms": 0, + "loading_retries": 0, }, ) create_mv(node, table_name, dst_table_name) From c47c8d603eaee53f63d20633b5357ae6f56109f2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 19 Jun 2024 16:16:37 +0200 Subject: [PATCH 028/100] Add AzureQueue --- src/Common/ProfileEvents.cpp | 10 +- src/Common/SystemLogBase.cpp | 2 +- src/Interpreters/Context.cpp | 9 ++ src/Interpreters/Context.h | 1 + ...QueueLog.cpp => ObjectStorageQueueLog.cpp} | 2 +- .../{S3QueueLog.h => ObjectStorageQueueLog.h} | 0 src/Interpreters/SystemLog.cpp | 3 +- src/Interpreters/SystemLog.h | 1 + .../ObjectStorageQueueIFileMetadata.cpp | 10 +- .../ObjectStorageQueueMetadata.cpp | 13 +- .../ObjectStorageQueueSettings.h | 1 + .../ObjectStorageQueueSource.cpp | 20 +-- .../ObjectStorageQueueSource.h | 9 +- .../StorageObjectStorageQueue.cpp | 9 +- .../ObjectStorageQueue/registerS3Queue.cpp | 140 +++++++++++------- src/Storages/registerStorages.cpp | 5 + .../integration/test_storage_s3_queue/test.py | 75 ++++++++-- 17 files changed, 200 insertions(+), 110 deletions(-) rename src/Interpreters/{S3QueueLog.cpp => ObjectStorageQueueLog.cpp} (98%) rename src/Interpreters/{S3QueueLog.h => ObjectStorageQueueLog.h} (100%) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index fef1c4a2b75..dd0379b8496 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -635,11 +635,11 @@ The server successfully detected this situation and will download merged part fr M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\ M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\ M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\ - M(S3QueueFailedFiles, "Number of files which failed to be processed")\ - M(S3QueueProcessedFiles, "Number of files which were processed")\ - M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ - M(S3QueuePullMicroseconds, "Time spent to read file data")\ - M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ + M(ObjectStorageQueueFailedFiles, "Number of files which failed to be processed")\ + M(ObjectStorageQueueProcessedFiles, "Number of files which were processed")\ + M(ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ + M(ObjectStorageQueuePullMicroseconds, "Time spent to read file data")\ + M(ObjectStorageQueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ \ M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\ M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \ diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 15803db4929..f6a78850622 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0d9f01813b3..99af6043aa3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4127,6 +4127,15 @@ std::shared_ptr Context::getS3QueueLog() const return shared->system_logs->s3_queue_log; } +std::shared_ptr Context::getAzureQueueLog() const +{ + SharedLockGuard lock(shared->mutex); + if (!shared->system_logs) + return {}; + + return shared->system_logs->azure_queue_log; +} + std::shared_ptr Context::getFilesystemReadPrefetchesLog() const { SharedLockGuard lock(shared->mutex); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index e20a76598ae..842e4096368 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1107,6 +1107,7 @@ public: std::shared_ptr getProcessorsProfileLog() const; std::shared_ptr getFilesystemCacheLog() const; std::shared_ptr getS3QueueLog() const; + std::shared_ptr getAzureQueueLog() const; std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; std::shared_ptr getBackupLog() const; diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/ObjectStorageQueueLog.cpp similarity index 98% rename from src/Interpreters/S3QueueLog.cpp rename to src/Interpreters/ObjectStorageQueueLog.cpp index 09aba5d909d..24261429434 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/ObjectStorageQueueLog.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/ObjectStorageQueueLog.h similarity index 100% rename from src/Interpreters/S3QueueLog.h rename to src/Interpreters/ObjectStorageQueueLog.h diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index d8139504c8e..15bf5a35fa7 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -304,6 +304,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 6e8875ef9a6..a60b69fc8f4 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -75,6 +75,7 @@ struct SystemLogs std::shared_ptr filesystem_cache_log; std::shared_ptr filesystem_read_prefetches_log; std::shared_ptr s3_queue_log; + std::shared_ptr azure_queue_log; /// Metrics from system.asynchronous_metrics. std::shared_ptr asynchronous_metric_log; /// OpenTelemetry trace spans. diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 8719d9f4635..816d6621247 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -11,8 +11,8 @@ namespace ProfileEvents { - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueFailedFiles; + extern const Event ObjectStorageQueueProcessedFiles; + extern const Event ObjectStorageQueueFailedFiles; }; namespace DB @@ -169,7 +169,7 @@ ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::c /// Since node name is just a hash we want to know to which file it corresponds, /// so we keep "file_path" in nodes data. /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by tracked_file_ttl_sec. - /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. + /// "last_exception" is kept for introspection, should also be visible in system.s3(azure)queue_log if it is enabled. /// "retries" is kept for retrying the processing enabled by loading_retries. NodeMetadata metadata; metadata.file_path = path; @@ -225,7 +225,7 @@ void ObjectStorageQueueIFileMetadata::setProcessed() { LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path); - ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles); + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueProcessedFiles); file_status->onProcessed(); setProcessedImpl(); @@ -239,7 +239,7 @@ void ObjectStorageQueueIFileMetadata::setFailed(const std::string & exception) { LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); - ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles); + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueFailedFiles); file_status->onFailed(exception); node_metadata.last_exception = exception; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp index 5eb6cfeaf4a..152a3f74a64 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp @@ -21,13 +21,8 @@ namespace ProfileEvents { - extern const Event S3QueueSetFileProcessingMicroseconds; - extern const Event S3QueueSetFileProcessedMicroseconds; - extern const Event S3QueueSetFileFailedMicroseconds; - extern const Event S3QueueFailedFiles; - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds; - extern const Event S3QueueLockLocalFileStatusesMicroseconds; + extern const Event ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds; + extern const Event ObjectStorageQueueLockLocalFileStatusesMicroseconds; }; namespace DB @@ -108,7 +103,7 @@ private: std::unique_lock lock() const { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueLockLocalFileStatusesMicroseconds); return std::unique_lock(mutex); } }; @@ -316,7 +311,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFunc() void ObjectStorageQueueMetadata::cleanupThreadFuncImpl() { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds); const auto zk_client = getZooKeeper(); const fs::path zookeeper_processed_path = zookeeper_path / "processed"; const fs::path zookeeper_failed_path = zookeeper_path / "failed"; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h index afe2730662c..0db5bb92bde 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -22,6 +22,7 @@ class ASTStorage; M(UInt32, loading_retries, 0, "Retry loading up to specified number of times", 0) \ M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ M(UInt32, enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ + M(UInt32, enable_logging_to_azure_queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index a56dd6ea343..05f925e2e2a 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -12,7 +12,7 @@ namespace ProfileEvents { - extern const Event S3QueuePullMicroseconds; + extern const Event ObjectStorageQueuePullMicroseconds; } namespace DB @@ -81,7 +81,7 @@ ObjectStorageQueueSource::ObjectInfoPtr ObjectStorageQueueSource::FileIterator:: std::pair ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) { - /// We need this lock to maintain consistency between listing s3 directory + /// We need this lock to maintain consistency between listing object storage directory /// and getting/putting result into listed_keys_cache. std::lock_guard lock(buckets_mutex); @@ -98,7 +98,7 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc /// and checks if corresponding bucket is already acquired by someone. /// In case it is already acquired, they put the key into listed_keys_cache, /// so that the thread who acquired the bucket will be able to see - /// those keys without the need to list s3 directory once again. + /// those keys without the need to list object storage directory once again. if (bucket_holder_it->second) { const auto bucket = bucket_holder_it->second->getBucket(); @@ -155,7 +155,7 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc } } /// If processing thread has already acquired some bucket - /// and while listing s3 directory gets a key which is in a different bucket, + /// and while listing object storage directory gets a key which is in a different bucket, /// it puts the key into listed_keys_cache to allow others to process it, /// because one processing thread can acquire only one bucket at a time. /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache @@ -292,7 +292,7 @@ ObjectStorageQueueSource::ObjectStorageQueueSource( ContextPtr context_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, + std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_) : ISource(header_) @@ -305,7 +305,7 @@ ObjectStorageQueueSource::ObjectStorageQueueSource( , requested_virtual_columns(requested_virtual_columns_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) - , s3_queue_log(s3_queue_log_) + , system_queue_log(system_queue_log_) , storage_id(storage_id_) , remove_file_func(remove_file_func_) , log(log_) @@ -400,11 +400,11 @@ Chunk ObjectStorageQueueSource::generate() auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); - /// FIXME: if files are compressed, profile counters update does not work fully (s3 related counters are not saved). Why? + /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? try { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueuePullMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); Chunk chunk; if (reader->pull(chunk)) @@ -487,7 +487,7 @@ void ObjectStorageQueueSource::appendLogElement( size_t processed_rows, bool processed) { - if (!s3_queue_log) + if (!system_queue_log) return; ObjectStorageQueueLogElement elem{}; @@ -507,7 +507,7 @@ void ObjectStorageQueueSource::appendLogElement( .exception = file_status_.getException(), }; } - s3_queue_log->add(std::move(elem)); + system_queue_log->add(std::move(elem)); } } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index a6ebaf06b5f..15aba3df283 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace Poco { class Logger; } @@ -49,9 +49,6 @@ public: std::atomic & shutdown_called_, LoggerPtr logger_); - /// Note: - /// List results in s3 are always returned in UTF-8 binary order. - /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) ObjectInfoPtr nextImpl(size_t processor) override; size_t estimatedKeysCount() override; @@ -92,7 +89,7 @@ public: ContextPtr context_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, + std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_); @@ -111,7 +108,7 @@ private: const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; - const std::shared_ptr s3_queue_log; + const std::shared_ptr system_queue_log; const StorageID storage_id; RemoveFileFunc remove_file_func; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 489980aafa8..397087f566f 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -105,7 +105,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( , configuration{configuration_} , format_settings(format_settings_) , reschedule_processing_interval_ms(queue_settings->polling_min_timeout_ms) - , log(getLogger("StorageObjectStorageQueue (" + table_id_.getFullTableName() + ")")) + , log(getLogger(fmt::format("{}Queue ({})", configuration->getEngineName(), table_id_.getFullTableName()))) { if (configuration->getPath().empty()) { @@ -332,7 +332,10 @@ std::shared_ptr StorageObjectStorageQueue::createSourc { object_storage->removeObject(StoredObject(path)); }; - auto s3_queue_log = queue_settings->enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; + auto system_queue_log = queue_settings->enable_logging_to_s3queue_log + ? local_context->getS3QueueLog() + : queue_settings->enable_logging_to_azure_queue_log ? local_context->getAzureQueueLog() : nullptr; + return std::make_shared( getName(), processor_id, @@ -345,7 +348,7 @@ std::shared_ptr StorageObjectStorageQueue::createSourc local_context, shutdown_called, table_is_being_dropped, - s3_queue_log, + system_queue_log, getStorageID(), log); } diff --git a/src/Storages/ObjectStorageQueue/registerS3Queue.cpp b/src/Storages/ObjectStorageQueue/registerS3Queue.cpp index 08b8104ae49..20968143627 100644 --- a/src/Storages/ObjectStorageQueue/registerS3Queue.cpp +++ b/src/Storages/ObjectStorageQueue/registerS3Queue.cpp @@ -1,14 +1,19 @@ #include "config.h" -#if USE_AWS_S3 - -#include #include -#include #include #include #include +#if USE_AWS_S3 +#include +#include +#endif + +#if USE_AZURE_BLOB_STORAGE +#include +#endif + namespace DB { @@ -17,64 +22,71 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +template +StoragePtr createQueueStorage(const StorageFactory::Arguments & args) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + return std::make_shared( + std::move(queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + args.storage_def, + args.mode); +} + +#if USE_AWS_S3 void registerStorageS3Queue(StorageFactory & factory) { factory.registerStorage( "S3Queue", [](const StorageFactory::Arguments & args) { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = std::make_shared(); - StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); - - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - - auto queue_settings = std::make_unique(); - if (args.storage_def->settings) - { - queue_settings->loadFromQuery(*args.storage_def); - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - - args.storage_def->settings->changes.removeSetting(change.name); - } - - for (const auto & change : args.storage_def->settings->changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.applyChange(change); - } - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - return std::make_shared( - std::move(queue_settings), - std::move(configuration), - args.table_id, - args.columns, - args.constraints, - args.comment, - args.getContext(), - format_settings, - args.storage_def, - args.mode); + return createQueueStorage(args); }, { .supports_settings = true, @@ -82,6 +94,22 @@ void registerStorageS3Queue(StorageFactory & factory) .source_access_type = AccessType::S3, }); } +#endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory) +{ + factory.registerStorage( + "AzureQueue", + [](const StorageFactory::Arguments & args) + { + return createQueueStorage(args); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); } #endif +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 47542b7b47e..628e5a85437 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,7 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); +void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -126,6 +127,10 @@ void registerStorages() registerStorageFuzzJSON(factory); #endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzureQueue(factory); +#endif + #if USE_AWS_S3 registerStorageHudi(factory); registerStorageS3Queue(factory); diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 3c55f0ff7f7..f1c8c07d523 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -12,6 +12,7 @@ import json AVAILABLE_MODES = ["unordered", "ordered"] DEFAULT_AUTH = ["'minio'", "'minio123'"] NO_AUTH = ["NOSIGN"] +AZURE_CONTAINER_NAME = "cont" def prepare_public_s3_bucket(started_cluster): @@ -84,6 +85,7 @@ def started_cluster(): "instance", user_configs=["configs/users.xml"], with_minio=True, + with_azurite=True, with_zookeeper=True, main_configs=[ "configs/zookeeper.xml", @@ -115,6 +117,9 @@ def started_cluster(): cluster.start() logging.info("Cluster started") + container_client = cluster.blob_service_client.get_container_client(AZURE_CONTAINER_NAME) + container_client.create_container() + yield cluster finally: cluster.shutdown() @@ -134,6 +139,7 @@ def generate_random_files( started_cluster, files_path, count, + storage = "s3", column_num=3, row_num=10, start_ind=0, @@ -155,7 +161,10 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - put_s3_file_content(started_cluster, filename, values_csv, bucket) + if storage == "s3": + put_s3_file_content(started_cluster, filename, values_csv, bucket) + else: + put_azure_file_content(started_cluster, filename, values_csv, bucket) return total_values @@ -164,6 +173,11 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): buf = io.BytesIO(data) started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) +def put_azure_file_content(started_cluster, filename, data, bucket=None): + client = started_cluster.blob_service_client.get_blob_client(AZURE_CONTAINER_NAME, filename) + buf = io.BytesIO(data) + client.upload_blob(buf, "BlockBlob", len(data)) + def create_table( started_cluster, @@ -171,6 +185,7 @@ def create_table( table_name, mode, files_path, + engine_name = "S3Queue", format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", @@ -189,11 +204,17 @@ def create_table( } settings.update(additional_settings) - url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = None + if engine_name == "S3Queue": + url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" + else: + engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" + node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" CREATE TABLE {table_name} ({format}) - ENGINE = S3Queue('{url}', {auth_params}, {file_format}) + ENGINE = {engine_def} SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ @@ -224,17 +245,29 @@ def create_mv( ) -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_delete_after_processing(started_cluster, mode): +@pytest.mark.parametrize( + "mode, engine_name", + [ + pytest.param("unordered", "S3Queue"), + pytest.param("unordered", "AzureQueue"), + pytest.param("ordered", "S3Queue"), + pytest.param("ordered", "AzureQueue"), + ], +) +def test_delete_after_processing(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.delete_after_processing_{mode}" + table_name = f"test.delete_after_processing_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" files_num = 5 row_num = 10 + if engine_name == "S3Queue": + storage = "s3" + else: + storage = "azure" total_values = generate_random_files( - started_cluster, files_path, files_num, row_num=row_num + started_cluster, files_path, files_num, row_num=row_num, storage = storage ) create_table( started_cluster, @@ -243,6 +276,7 @@ def test_delete_after_processing(started_cluster, mode): mode, files_path, additional_settings={"after_processing": "delete"}, + engine_name = engine_name, ) create_mv(node, table_name, dst_table_name) @@ -263,15 +297,29 @@ def test_delete_after_processing(started_cluster, mode): ).splitlines() ] == sorted(total_values, key=lambda x: (x[0], x[1], x[2])) - minio = started_cluster.minio_client - objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) - assert len(objects) == 0 + if engine_name == "S3Queue": + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + else: + client = started_cluster.blob_service_client.get_container_client(AZURE_CONTAINER_NAME) + objects_iterator = client.list_blobs(files_path) + for objects in objects_iterator: + assert False -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_failed_retry(started_cluster, mode): +@pytest.mark.parametrize( + "mode, engine_name", + [ + pytest.param("unordered", "S3Queue"), + pytest.param("unordered", "AzureQueue"), + pytest.param("ordered", "S3Queue"), + pytest.param("ordered", "AzureQueue"), + ], +) +def test_failed_retry(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.failed_retry_{mode}" + table_name = f"test.failed_retry_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" file_path = f"{files_path}/trash_test.csv" @@ -296,6 +344,7 @@ def test_failed_retry(started_cluster, mode): "s3queue_loading_retries": retries_num, "keeper_path": keeper_path, }, + engine_name = engine_name, ) create_mv(node, table_name, dst_table_name) From dc6fa85bc37f1239bc65d599dd5dfa0a7c1c1cc4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 19 Jun 2024 14:37:46 +0000 Subject: [PATCH 029/100] Automatic style fix --- .../integration/test_storage_s3_queue/test.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index f1c8c07d523..463f457e920 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -117,7 +117,9 @@ def started_cluster(): cluster.start() logging.info("Cluster started") - container_client = cluster.blob_service_client.get_container_client(AZURE_CONTAINER_NAME) + container_client = cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) container_client.create_container() yield cluster @@ -139,7 +141,7 @@ def generate_random_files( started_cluster, files_path, count, - storage = "s3", + storage="s3", column_num=3, row_num=10, start_ind=0, @@ -173,8 +175,11 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): buf = io.BytesIO(data) started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) + def put_azure_file_content(started_cluster, filename, data, bucket=None): - client = started_cluster.blob_service_client.get_blob_client(AZURE_CONTAINER_NAME, filename) + client = started_cluster.blob_service_client.get_blob_client( + AZURE_CONTAINER_NAME, filename + ) buf = io.BytesIO(data) client.upload_blob(buf, "BlockBlob", len(data)) @@ -185,7 +190,7 @@ def create_table( table_name, mode, files_path, - engine_name = "S3Queue", + engine_name="S3Queue", format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", @@ -267,7 +272,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): storage = "azure" total_values = generate_random_files( - started_cluster, files_path, files_num, row_num=row_num, storage = storage + started_cluster, files_path, files_num, row_num=row_num, storage=storage ) create_table( started_cluster, @@ -276,7 +281,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): mode, files_path, additional_settings={"after_processing": "delete"}, - engine_name = engine_name, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -302,7 +307,9 @@ def test_delete_after_processing(started_cluster, mode, engine_name): objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) assert len(objects) == 0 else: - client = started_cluster.blob_service_client.get_container_client(AZURE_CONTAINER_NAME) + client = started_cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) objects_iterator = client.list_blobs(files_path) for objects in objects_iterator: assert False @@ -344,7 +351,7 @@ def test_failed_retry(started_cluster, mode, engine_name): "s3queue_loading_retries": retries_num, "keeper_path": keeper_path, }, - engine_name = engine_name, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) From 73c0a65d0a42f08d2fbcfc35064755b982d0e14b Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 19 Jun 2024 19:00:28 +0200 Subject: [PATCH 030/100] Rewrite visitor to analyzer --- .../Resolve/ParametrizedViewFunctionVisitor.h | 54 +++++++++++ src/Analyzer/Resolve/QueryAnalyzer.cpp | 35 ++----- src/Interpreters/Context.cpp | 2 +- .../FunctionParameterValuesVisitor.cpp | 92 +++++++------------ src/Parsers/FunctionParameterValuesVisitor.h | 11 +-- ...03167_parametrized_view_with_cte.reference | 1 - .../03167_parametrized_view_with_cte.sql | 2 +- 7 files changed, 97 insertions(+), 100 deletions(-) create mode 100644 src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h diff --git a/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h b/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h new file mode 100644 index 00000000000..1845cf22286 --- /dev/null +++ b/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ParametrizedViewFunctionParamsVisitor : public InDepthQueryTreeVisitor +{ +public: + ParametrizedViewFunctionParamsVisitor( + std::function resolve_node_, + const ContextPtr & context_) + : context(context_) + , resolve_node(resolve_node_) + { + } + + void visitImpl(QueryTreeNodePtr & node) + { + if (auto * function_node = node->as()) + { + if (function_node->getFunctionName() != "equals") + return; + + auto nodes = function_node->getArguments().getNodes(); + if (nodes.size() != 2) + return; + + if (auto * identifier_node = nodes[0]->as()) + { + resolve_node(nodes[1]); + auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(nodes[1]->toAST(), context); + auto resolved_value_str = convertFieldToString(resolved_value->as()->value); + params[identifier_node->getIdentifier().getFullName()] = resolved_value_str; + } + } + } + + bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &) { return true; } + + NameToNameMap getParams() const { return params; } + +private: + NameToNameMap params; + const ContextPtr context; + std::function resolve_node; +}; +} diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index e9f5a55dba9..26fa3574865 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -25,7 +24,6 @@ #include #include #include -#include #include @@ -63,6 +61,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -4509,35 +4508,19 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, } auto context = scope_context->getQueryContext(); - auto param_values_result = analyzeFunctionParamValues(function_ast, context, &scope.aliases); - - for (const auto & [param_name, alias] : param_values_result.unresolved_param_aliases) - { - auto it = scope.aliases.alias_name_to_expression_node_before_group_by.find(alias); - if (it != scope.aliases.alias_name_to_expression_node_before_group_by.end()) + auto params_visitor = ParametrizedViewFunctionParamsVisitor( + [&](QueryTreeNodePtr node) { - std::string resolved_value_str; - try - { - resolveExpressionNode(it->second, scope, /* allow_lambda_expression */false, /* allow_table_expression */false); - auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(it->second->toAST(), context); - resolved_value_str = convertFieldToString(resolved_value->as()->value); - } - catch (...) - { - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Failed to resolve alias ({}) value for parameter {} for parametrized view function: {}. Error: {}", - alias, param_name, it->second->formatASTForErrorMessage(), getCurrentExceptionMessage(true)); - } - param_values_result.resolved_param_values.emplace(param_name, std::move(resolved_value_str)); - } - } + resolveExpressionNode(node, scope, /* allow_lambda_expression */true, /* allow_table_function */false); + }, + context); + + params_visitor.visit(table_function_node); auto parametrized_view_storage = context->buildParametrizedViewStorage( database_name, table_name, - param_values_result.resolved_param_values); + params_visitor.getParams()); if (parametrized_view_storage) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e5d1fc5248d..b714a0b26a7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1871,7 +1871,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext(), nullptr).resolved_param_values; + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext()); StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 452ca20ab97..eaf28bbbc41 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -4,9 +4,8 @@ #include #include #include -#include -#include #include +#include #include #include @@ -22,13 +21,9 @@ namespace ErrorCodes class FunctionParameterValuesVisitor { public: - explicit FunctionParameterValuesVisitor( - ParamValuesAnalyzeResult & result_, - ContextPtr context_, - const ScopeAliases * aliases_) - : result(result_) - , aliases(aliases_) - , context(context_) + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, ContextPtr context_) + : parameter_values(parameter_values_) + , context(context_) { } @@ -36,58 +31,14 @@ public: { if (const auto * function = ast->as()) visitFunction(*function); - for (const auto & child : ast->children) visit(child); } private: - ParamValuesAnalyzeResult & result; - const ScopeAliases * aliases; + NameToNameMap & parameter_values; ContextPtr context; - std::optional tryGetParameterValueAsString(const std::string & param_name, const ASTPtr & ast) - { - if (const auto * literal = ast->as()) - { - return convertFieldToString(literal->value); - } - else if (const auto * value_identifier = ast->as()) - { - if (aliases) - { - auto it = aliases->alias_name_to_expression_node_before_group_by.find(value_identifier->name()); - if (it != aliases->alias_name_to_expression_node_before_group_by.end()) - { - auto value_str = tryGetParameterValueAsString(param_name, it->second->toAST()); - if (!value_str.has_value()) - result.unresolved_param_aliases.emplace(param_name, value_identifier->name()); - return value_str; - } - } - } - else if (const auto * function = ast->as()) - { - if (isFunctionCast(function)) - { - const auto * cast_expression = assert_cast(function->arguments.get()); - if (cast_expression->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); - - if (const auto * cast_literal = cast_expression->children[0]->as()) - { - return convertFieldToString(cast_literal->value); - } - } - else - { - ASTPtr res = evaluateConstantExpressionOrIdentifierAsLiteral(ast, context); - return convertFieldToString(res->as()->value); - } - } - return std::nullopt; - } - void visitFunction(const ASTFunction & parameter_function) { if (parameter_function.name != "equals" && parameter_function.children.size() != 1) @@ -100,18 +51,37 @@ private: if (const auto * identifier = expression_list->children[0]->as()) { - auto value_str = tryGetParameterValueAsString(identifier->name(), expression_list->children[1]); - if (value_str.has_value()) - result.resolved_param_values[identifier->name()] = value_str.value(); + if (const auto * literal = expression_list->children[1]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(literal->value); + } + else if (const auto * function = expression_list->children[1]->as()) + { + if (isFunctionCast(function)) + { + const auto * cast_expression = assert_cast(function->arguments.get()); + if (cast_expression->children.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); + if (const auto * cast_literal = cast_expression->children[0]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); + } + } + else + { + ASTPtr res = evaluateConstantExpressionOrIdentifierAsLiteral(expression_list->children[1], context); + parameter_values[identifier->name()] = convertFieldToString(res->as()->value); + } + } } } }; -ParamValuesAnalyzeResult analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases) +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, ContextPtr context) { - ParamValuesAnalyzeResult result; - FunctionParameterValuesVisitor(result, context, scope_aliases).visit(ast); - return result; + NameToNameMap parameter_values; + FunctionParameterValuesVisitor(parameter_values, context).visit(ast); + return parameter_values; } diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 50dca8d86de..8c2686dcc65 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -7,17 +7,8 @@ namespace DB { -struct ScopeAliases; /// Find parameters in a query parameter values and collect them into map. -struct ParamValuesAnalyzeResult -{ - /// Param name -> resolved param value - NameToNameMap resolved_param_values; - /// Param name -> alias - NameToNameMap unresolved_param_aliases; -}; - -ParamValuesAnalyzeResult analyzeFunctionParamValues(const ASTPtr & ast, const ContextPtr & context, const ScopeAliases * scope_aliases = nullptr); +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, ContextPtr context); } diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference index 951910bbe74..9c1bdda2e2c 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -2,4 +2,3 @@ OK 123 123 123 -123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql index c64200ee8ff..80eba7867cc 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -4,4 +4,4 @@ WITH 'OK' AS s SELECT * FROM param_test(test_str=s); WITH (SELECT 123) AS s SELECT * FROM param_test(test_str=s); WITH (SELECT 100 + 20 + 3) AS s SELECT * FROM param_test(test_str=s); WITH (SELECT number FROM numbers(123, 1)) AS s SELECT * FROM param_test(test_str=s); -WITH CAST(123, String) AS s SELECT * FROM param_test(test_str=s); +-- WITH CAST(123, String) AS s SELECT * FROM param_test(test_str=s); From 1eabdd81b6492e6109b27ff639d11f95845b47e2 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 19 Jun 2024 19:12:08 +0200 Subject: [PATCH 031/100] Fix logger name --- src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 397087f566f..640dbf22421 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -105,7 +105,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( , configuration{configuration_} , format_settings(format_settings_) , reschedule_processing_interval_ms(queue_settings->polling_min_timeout_ms) - , log(getLogger(fmt::format("{}Queue ({})", configuration->getEngineName(), table_id_.getFullTableName()))) + , log(getLogger(fmt::format("Storage{}Queue ({})", configuration->getEngineName(), table_id_.getFullTableName()))) { if (configuration->getPath().empty()) { From ede8630205e4c073c7f1f8db46ea1c025e5e2de2 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jun 2024 19:25:16 +0000 Subject: [PATCH 032/100] Fix some tests, add debug logging for test 00693_max_block_size_system_tables_columns, use global context + current database in getDependenciesFromCreateQuery --- src/Backups/RestorerFromBackup.cpp | 2 +- src/Databases/DDLDependencyVisitor.cpp | 8 ++++---- src/Databases/DDLDependencyVisitor.h | 2 +- src/Databases/DDLLoadingDependencyVisitor.cpp | 2 +- src/Databases/DatabaseMemory.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabaseReplicated.cpp | 2 +- src/Databases/TablesLoader.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 4 ++-- src/Storages/System/StorageSystemColumns.cpp | 1 + .../0_stateless/01083_expressions_in_engine_arguments.sql | 2 +- .../0_stateless/01852_dictionary_found_rate_long.sql | 2 +- .../02152_http_external_tables_memory_tracking.sh | 2 +- 13 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 1a3fdf58cc4..454a0468e9f 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -438,7 +438,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ String create_table_query_str = serializeAST(*create_table_query); bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); - auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); + auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query, context->getCurrentDatabase()); bool table_has_data = backup->hasFiles(data_path_in_backup); std::lock_guard lock{mutex}; diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index fdc51f4f43d..06df7371b50 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -30,8 +30,8 @@ namespace { friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data); public: - DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_) - : create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_) + DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_) + : create_query(ast_), table_name(table_name_), current_database(current_database_), context(context_) { } @@ -464,9 +464,9 @@ namespace } -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database) { - DDLDependencyVisitor::Data data{context, table_name, ast}; + DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); return std::move(data).getDependencies(); diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 29ea6298b04..a17640f7a14 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -13,6 +13,6 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables explicitly referenced in the create query of a specified table. /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast); +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 0253709fb6e..5a7e9e45cf8 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -122,7 +122,7 @@ void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) { if (storage.ttl_table) { - auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr()); + auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr(), data.default_database); data.dependencies.merge(ttl_dependensies); } diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index dce20e3ac6f..508e57a4eaf 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -154,7 +154,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl applyMetadataChangesToCreateQuery(it->second, metadata); /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context, table_id.getQualifiedName(), it->second); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 18c92e6bcbc..d6a5f39a09f 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -539,7 +539,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta } /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context, table_id.getQualifiedName(), ast); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index badfedeec9b..fde84592e17 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1146,7 +1146,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// And QualifiedTableName::parseFromString doesn't handle this. auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name}; auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); - tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast)); + tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast, getContext()->getCurrentDatabase())); } tables_dependencies.checkNoCyclicDependencies(); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 4bfe44ba72c..33944277995 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -137,7 +137,7 @@ void TablesLoader::buildDependencyGraph() { for (const auto & [table_name, table_metadata] : metadata.parsed_tables) { - auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); + auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); if (!new_ref_dependencies.empty()) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 35207bc9d39..242b093c0ea 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1083,7 +1083,7 @@ namespace void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(context, qualified_name, query_ptr); + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); } @@ -1091,7 +1091,7 @@ void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_pt void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(context, qualified_name, query_ptr); + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 49da1eba9ec..a6a791f75b5 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -134,6 +134,7 @@ protected: cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = metadata_snapshot->getColumnsRequiredForPrimaryKey(); cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); + LOG_DEBUG(getLogger("StorageSystemColumns"), "Get column sizes for table {}.{}", database_name, table_name); column_sizes = storage->getColumnSizes(); } diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index 6268765aa27..bdfbf2a47cf 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -88,6 +88,7 @@ SELECT sum(n) from rich_syntax; SYSTEM DROP DNS CACHE; DROP TABLE file; +DROP DICTIONARY dict; DROP TABLE url; DROP TABLE view; DROP TABLE buffer; @@ -96,4 +97,3 @@ DROP TABLE merge_tf; DROP TABLE distributed; DROP TABLE distributed_tf; DROP TABLE rich_syntax; -DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql index d5108e98510..da364403893 100644 --- a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql +++ b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql @@ -310,6 +310,6 @@ SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabas SELECT tuple(x, y) as key, dictGet('polygon_dictionary_01862', 'name', key) FROM points_01862 FORMAT Null; SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'polygon_dictionary_01862'; +DROP DICTIONARY polygon_dictionary_01862; DROP TABLE polygons_01862; DROP TABLE points_01862; -DROP DICTIONARY polygon_dictionary_01862; diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh index 5494f7d59cb..8aba362d6af 100755 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel, no-debug +# Tags: no-tsan, no-cpu-aarch64, no-parallel # TSan does not supports tracing. # trace_log doesn't work on aarch64 From 1f0405c77d3972d8330fa2c1ac95dc0dd8424ca3 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Jun 2024 21:29:59 +0200 Subject: [PATCH 033/100] Fixed test --- tests/integration/test_storage_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 9f5aef1489c..fff786e3102 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1217,7 +1217,7 @@ def test_filtering_by_file_or_path(cluster): node.query("SYSTEM FLUSH LOGS") result = node.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%azure%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'" ) assert int(result) == 1 From c424fcd79f593fed9ce51a63ab66fcca41f71ab6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 20 Jun 2024 13:39:32 +0200 Subject: [PATCH 034/100] Fix --- src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h | 10 +++++----- src/Analyzer/Resolve/QueryAnalyzer.cpp | 7 ++++++- .../03167_parametrized_view_with_cte.reference | 1 + .../0_stateless/03167_parametrized_view_with_cte.sql | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h b/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h index 1845cf22286..8b726853543 100644 --- a/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h +++ b/src/Analyzer/Resolve/ParametrizedViewFunctionVisitor.h @@ -14,7 +14,7 @@ class ParametrizedViewFunctionParamsVisitor : public InDepthQueryTreeVisitor resolve_node_, + std::function resolve_node_, const ContextPtr & context_) : context(context_) , resolve_node(resolve_node_) @@ -34,8 +34,8 @@ public: if (auto * identifier_node = nodes[0]->as()) { - resolve_node(nodes[1]); - auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(nodes[1]->toAST(), context); + auto resolved_node = resolve_node(nodes[1]); + auto resolved_value = evaluateConstantExpressionOrIdentifierAsLiteral(resolved_node->toAST(), context); auto resolved_value_str = convertFieldToString(resolved_value->as()->value); params[identifier_node->getIdentifier().getFullName()] = resolved_value_str; } @@ -44,11 +44,11 @@ public: bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &) { return true; } - NameToNameMap getParams() const { return params; } + const NameToNameMap & getParametersMap() const { return params; } private: NameToNameMap params; const ContextPtr context; - std::function resolve_node; + std::function resolve_node; }; } diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 26fa3574865..cd2ae38e386 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4512,6 +4512,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, [&](QueryTreeNodePtr node) { resolveExpressionNode(node, scope, /* allow_lambda_expression */true, /* allow_table_function */false); + auto alias_node = scope.aliases.alias_name_to_expression_node->find(node->getAlias()); + if (alias_node != scope.aliases.alias_name_to_expression_node->end()) + return alias_node->second; + else + return node; }, context); @@ -4520,7 +4525,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, auto parametrized_view_storage = context->buildParametrizedViewStorage( database_name, table_name, - params_visitor.getParams()); + params_visitor.getParametersMap()); if (parametrized_view_storage) { diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference index 9c1bdda2e2c..951910bbe74 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -2,3 +2,4 @@ OK 123 123 123 +123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql index 80eba7867cc..1ac5540047a 100644 --- a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -4,4 +4,4 @@ WITH 'OK' AS s SELECT * FROM param_test(test_str=s); WITH (SELECT 123) AS s SELECT * FROM param_test(test_str=s); WITH (SELECT 100 + 20 + 3) AS s SELECT * FROM param_test(test_str=s); WITH (SELECT number FROM numbers(123, 1)) AS s SELECT * FROM param_test(test_str=s); --- WITH CAST(123, String) AS s SELECT * FROM param_test(test_str=s); +WITH CAST(123, 'String') AS s SELECT * FROM param_test(test_str=s); From 1248490f0d023ab7598d2f38e847e5efffd325cb Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Jun 2024 14:51:49 +0200 Subject: [PATCH 035/100] Added more tests for different signatures --- .../test_mask_sensitive_info/test.py | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 965d6518164..c9895bdc2d9 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -371,11 +371,19 @@ def test_table_functions(): "DNS_ERROR", ), f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", - f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_1.csv', '{azure_account_name}', '{azure_account_key}')", - f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", - f"azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_3.csv', 'CSV')", - f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '{azure_account_key}')", - f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", + f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", + f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '{azure_account_key}')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none')", + f"azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_7.csv', 'CSV')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_8.csv', 'CSV', 'none')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV', 'none', 'auto')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_10.csv', '{azure_account_name}', '{azure_account_key}')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_11.csv', '{azure_account_name}', '{azure_account_key}', 'CSV')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none')", + f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", ] def make_test_case(i): @@ -450,11 +458,19 @@ def test_table_functions(): "CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')", "CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", - f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_1.csv', '{azure_account_name}', '[HIDDEN]')", - f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", - f"CREATE TABLE tablefunc36 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_3.csv', 'CSV')", - f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]')", - f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", + f"CREATE TABLE tablefunc34 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", + f"CREATE TABLE tablefunc35 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", + f"CREATE TABLE tablefunc36 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]')", + f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", + f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')", + f"CREATE TABLE tablefunc39 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", + f"CREATE TABLE tablefunc40 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_7.csv', 'CSV')", + f"CREATE TABLE tablefunc41 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_8.csv', 'CSV', 'none')", + f"CREATE TABLE tablefunc42 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV', 'none', 'auto')", + f"CREATE TABLE tablefunc43 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_10.csv', '{azure_account_name}', '[HIDDEN]')", + f"CREATE TABLE tablefunc44 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_11.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", + f"CREATE TABLE tablefunc45 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')", + f"CREATE TABLE tablefunc46 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", ], must_not_contain=[password], ) From 347d67cbe525972bcf2caadaae190f5ddf4217ef Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jun 2024 15:58:08 +0000 Subject: [PATCH 036/100] Process databases correctly --- src/Databases/DDLDependencyVisitor.cpp | 44 +++++++++++++------ src/Databases/DDLDependencyVisitor.h | 1 + src/Databases/DDLLoadingDependencyVisitor.cpp | 22 +++++++--- src/Databases/DDLLoadingDependencyVisitor.h | 2 +- src/Databases/DatabaseMemory.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabaseReplicated.cpp | 2 +- src/Databases/TablesLoader.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 4 +- 9 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 06df7371b50..c85e8f5688a 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -30,8 +30,8 @@ namespace { friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data); public: - DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_) - : create_query(ast_), table_name(table_name_), current_database(current_database_), context(context_) + DDLDependencyVisitorData(const ContextPtr & global_context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_) + : create_query(ast_), table_name(table_name_), default_database(global_context_->getCurrentDatabase()), current_database(current_database_), global_context(global_context_) { } @@ -71,8 +71,9 @@ namespace ASTPtr create_query; std::unordered_set skip_asts; QualifiedTableName table_name; + String default_database; String current_database; - ContextPtr context; + ContextPtr global_context; TableNamesSet dependencies; /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. @@ -108,8 +109,8 @@ namespace if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; - auto config = getDictionaryConfigurationFromAST(create_query->as(), context); - auto info = getInfoIfClickHouseDictionarySource(config, context); + auto config = getDictionaryConfigurationFromAST(create_query->as(), global_context); + auto info = getInfoIfClickHouseDictionarySource(config, global_context); /// We consider only dependencies on local tables. if (!info || !info->is_local) @@ -117,14 +118,21 @@ namespace if (!info->table_name.table.empty()) { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. if (info->table_name.database.empty()) - info->table_name.database = current_database; + info->table_name.database = table_name.database; dependencies.emplace(std::move(info->table_name)); } else { - /// We don't have a table name, we have a select query instead + /// We don't have a table name, we have a select query instead. + /// All tables from select query in dictionary definition won't + /// use current database, as this query is executed with global context. + /// Use default database from global context while visiting select query. + String current_database_ = current_database; + current_database = default_database; tryVisitNestedSelect(info->query, *this); + current_database = current_database_; } } @@ -181,7 +189,7 @@ namespace if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0)) { - auto cluster = context->tryGetCluster(*cluster_name); + auto cluster = global_context->tryGetCluster(*cluster_name); if (cluster && cluster->getLocalShardCount()) has_local_replicas = true; } @@ -236,7 +244,7 @@ namespace { if (auto cluster_name = tryGetClusterNameFromArgument(function, 0)) { - if (auto cluster = context->tryGetCluster(*cluster_name)) + if (auto cluster = global_context->tryGetCluster(*cluster_name)) { if (cluster->getLocalShardCount()) has_local_replicas = true; @@ -308,7 +316,10 @@ namespace try { /// We're just searching for dependencies here, it's not safe to execute subqueries now. - auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// Use copy of the global_context and set current database, because expressions can contain currentDatabase() function. + ContextMutablePtr global_context_copy = Context::createCopy(global_context); + global_context_copy->setCurrentDatabase(current_database); + auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, global_context_copy); const auto * literal = evaluated->as(); if (!literal || (literal->value.getType() != Field::Types::String)) return {}; @@ -449,7 +460,7 @@ namespace ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); - const Settings & settings = data.context->getSettingsRef(); + const Settings & settings = data.global_context->getSettingsRef(); ASTPtr select = parseQuery(parser, fixed_query, description, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); @@ -464,12 +475,19 @@ namespace } -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database) +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database) { - DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database}; + DDLDependencyVisitor::Data data{global_global_context, table_name, ast, current_database}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); return std::move(data).getDependencies(); } +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database) +{ + DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database}; + tryVisitNestedSelect(select_query, data); + return std::move(data).getDependencies(); +} + } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index a17640f7a14..fcbad83f440 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -14,5 +14,6 @@ using TableNamesSet = std::unordered_set; /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database); +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 5a7e9e45cf8..d3f41cecb86 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -21,11 +21,11 @@ namespace DB using TableLoadingDependenciesVisitor = DDLLoadingDependencyVisitor::Visitor; -TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast, const String & default_database) +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) { assert(global_context == global_context->getGlobalContext()); TableLoadingDependenciesVisitor::Data data; - data.default_database = default_database; + data.default_database = global_context->getCurrentDatabase(); data.create_query = ast; data.global_context = global_context; data.table_name = table; @@ -110,12 +110,22 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); - if (!info || !info->is_local || info->table_name.table.empty()) + if (!info || !info->is_local) return; - if (info->table_name.database.empty()) - info->table_name.database = data.default_database; - data.dependencies.emplace(std::move(info->table_name)); + if (!info->table_name.table.empty()) + { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. + if (info->table_name.database.empty()) + info->table_name.database = data.table_name.database; + data.dependencies.emplace(std::move(info->table_name)); + } + else + { + /// We don't have a table name, we have a select query instead that will be executed during dictionary loading. + auto select_query_dependencies = getDependenciesFromDictionaryNestedSelectQuery(data.global_context, data.table_name, data.create_query, info->query, data.default_database); + data.dependencies.merge(select_query_dependencies); + } } void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h index 099d9c1979b..a9e9f4d7a53 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.h +++ b/src/Databases/DDLLoadingDependencyVisitor.h @@ -16,7 +16,7 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables which should be loaded before a specified table. /// For example, a local ClickHouse table should be loaded before a dictionary which uses that table as its source. /// Does not validate AST, works a best-effort way. -TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast, const String & default_database); +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); class DDLMatcherBase diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 508e57a4eaf..86bf0471b8f 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -155,7 +155,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl /// The create query of the table has been just changed, we need to update dependencies too. auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index d6a5f39a09f..7d4bb07e8ef 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -540,7 +540,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta /// The create query of the table has been just changed, we need to update dependencies too. auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, local_context); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index fde84592e17..ad1603f08bb 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1146,7 +1146,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// And QualifiedTableName::parseFromString doesn't handle this. auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name}; auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); - tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast, getContext()->getCurrentDatabase())); + tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ast, getContext()->getCurrentDatabase())); } tables_dependencies.checkNoCyclicDependencies(); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 33944277995..733e5d53981 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -138,7 +138,7 @@ void TablesLoader::buildDependencyGraph() for (const auto & [table_name, table_metadata] : metadata.parsed_tables) { auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); - auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); + auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); if (!new_ref_dependencies.empty()) referential_dependencies.addDependencies(table_name, new_ref_dependencies); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 242b093c0ea..fad7fd421a8 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1084,7 +1084,7 @@ void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_pt { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); } @@ -1092,7 +1092,7 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, { QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } From b9e6d1c26bea4161c37f0e496d18354d64136693 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jun 2024 16:02:03 +0000 Subject: [PATCH 037/100] Better comments --- src/Databases/DDLDependencyVisitor.h | 2 ++ src/Databases/DDLLoadingDependencyVisitor.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index fcbad83f440..400e6b04108 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -14,6 +14,8 @@ using TableNamesSet = std::unordered_set; /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database); + +/// Returns a list of all tables explicitly referenced in the select query specified as a dictionary source. TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index d3f41cecb86..40234abb20f 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -123,6 +123,7 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & else { /// We don't have a table name, we have a select query instead that will be executed during dictionary loading. + /// We need to find all tables used in this select query and add them to dependencies. auto select_query_dependencies = getDependenciesFromDictionaryNestedSelectQuery(data.global_context, data.table_name, data.create_query, info->query, data.default_database); data.dependencies.merge(select_query_dependencies); } From 5904847316115cfdae3662e39746e5c07b1647dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 20 Jun 2024 18:32:00 +0200 Subject: [PATCH 038/100] Fix tests --- .../engines/table-engines/integrations/s3queue.md | 2 ++ tests/integration/test_storage_s3_queue/test.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 0958680dc56..e8b1628d03e 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32) [s3queue_cleanup_interval_max_ms = 30000,] ``` +Starting with `24.7` settings without `s3queue_` prefix are also supported. + **Engine parameters** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 463f457e920..61b74db74bb 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -339,7 +339,10 @@ def test_failed_retry(started_cluster, mode, engine_name): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() - put_s3_file_content(started_cluster, file_path, values_csv) + if engine_name == "S3Queue": + put_s3_file_content(started_cluster, file_path, values_csv) + else: + put_azure_file_content(started_cluster, file_path, values_csv) create_table( started_cluster, @@ -886,7 +889,7 @@ def test_max_set_age(started_cluster): failed_count = int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -901,7 +904,7 @@ def test_max_set_age(started_cluster): for _ in range(30): if failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ): break @@ -909,7 +912,7 @@ def test_max_set_age(started_cluster): assert failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -926,7 +929,7 @@ def test_max_set_age(started_cluster): time.sleep(max_age + 1) assert failed_count + 2 == int( - node.query("SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles'") + node.query("SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'") ) node.query("SYSTEM FLUSH LOGS") From da55a213ef1b5a210613c0946c01b06688ab40aa Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 20 Jun 2024 16:38:03 +0000 Subject: [PATCH 039/100] Automatic style fix --- tests/integration/test_storage_s3_queue/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 61b74db74bb..71886bf9dcd 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -929,7 +929,9 @@ def test_max_set_age(started_cluster): time.sleep(max_age + 1) assert failed_count + 2 == int( - node.query("SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'") + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'" + ) ) node.query("SYSTEM FLUSH LOGS") From c26e7b506766d8068576e733e9870b4d2a6a0482 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jun 2024 16:56:30 +0000 Subject: [PATCH 040/100] Fix test --- tests/queries/0_stateless/01760_system_dictionaries.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01760_system_dictionaries.sql b/tests/queries/0_stateless/01760_system_dictionaries.sql index a5609281e49..2e7d4184811 100644 --- a/tests/queries/0_stateless/01760_system_dictionaries.sql +++ b/tests/queries/0_stateless/01760_system_dictionaries.sql @@ -25,8 +25,8 @@ SELECT * FROM 01760_db.example_simple_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_simple_key_source; DROP DICTIONARY 01760_db.example_simple_key_dictionary; +DROP TABLE 01760_db.example_simple_key_source; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; @@ -53,7 +53,7 @@ SELECT * FROM 01760_db.example_complex_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_complex_key_source; DROP DICTIONARY 01760_db.example_complex_key_dictionary; +DROP TABLE 01760_db.example_complex_key_source; DROP DATABASE 01760_db; From b3a7a1aa3dcfb8c2be3d0d00ea67dd6a34e280f7 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 7 Jun 2024 00:40:55 +0000 Subject: [PATCH 041/100] Fix AST formatting of 'create table b empty as a' --- src/Parsers/ASTCreateQuery.cpp | 14 ++++++++++++-- .../03168_inconsistent_ast_formatting.reference | 0 .../03168_inconsistent_ast_formatting.sql | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference create mode 100644 tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 3e5c6a9d86e..76fa943d1e0 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -404,8 +404,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat << quoteString(toString(to_inner_uuid)); } + bool should_add_empty = is_create_empty; + auto add_empty_if_needed = [&] { + if (!should_add_empty) + return; + should_add_empty = false; + settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + }; + if (!as_table.empty()) { + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); @@ -423,6 +432,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; } + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); as_table_function->formatImpl(settings, state, frame); } @@ -484,8 +494,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (is_populate) settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); - else if (is_create_empty) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + + add_empty_if_needed(); if (sql_security && supportSQLSecurity() && sql_security->as().type.has_value()) { diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql new file mode 100644 index 00000000000..b21aa222106 --- /dev/null +++ b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql @@ -0,0 +1,2 @@ +create table a (x Int8) engine Memory; +create table b empty as a; From bf8a9a2c4368a3c4caae3045c65b48f0f6d80ad5 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 20 Jun 2024 20:04:25 +0000 Subject: [PATCH 042/100] Fix DatabaseReplicated --- src/Interpreters/InterpreterCreateQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7272e10b801..d73c9a10ad9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -898,6 +898,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti assert(as_database_saved.empty() && as_table_saved.empty()); std::swap(create.as_database, as_database_saved); std::swap(create.as_table, as_table_saved); + if (!as_database_saved.empty()) + create.is_create_empty = false; return properties; } From 3c929636b58f9f3b9a7829cf07196622e697f3b9 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 20 Jun 2024 23:03:06 +0000 Subject: [PATCH 043/100] Stylish --- src/Parsers/ASTCreateQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 76fa943d1e0..d56a2724914 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -405,7 +405,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } bool should_add_empty = is_create_empty; - auto add_empty_if_needed = [&] { + auto add_empty_if_needed = [&] + { if (!should_add_empty) return; should_add_empty = false; From 5447145c7aa2be0333a17066a9591e0d6d5c88a6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 21 Jun 2024 11:53:01 +0200 Subject: [PATCH 044/100] Fix test --- tests/integration/test_storage_s3_queue/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 71886bf9dcd..a118c8da52f 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1475,7 +1475,7 @@ def test_settings_check(started_cluster): ) assert ( - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. Stored in ZooKeeper: 2, local: 3" + "Existing table metadata in ZooKeeper differs in buckets setting. Stored in ZooKeeper: 2, local: 3" in create_table( started_cluster, node_2, From 0b9e6de6fbfe5ad13d96e7f417479a588f2fde7c Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 11:47:33 +0000 Subject: [PATCH 045/100] Fix remaining tests --- src/Storages/System/StorageSystemColumns.cpp | 1 - .../test_dictionaries_replace/test.py | 2 + ..._external_tables_memory_tracking.reference | 16 ------ ...52_http_external_tables_memory_tracking.sh | 57 ------------------- ...clic_dependencies_on_create_and_rename.sql | 2 + 5 files changed, 4 insertions(+), 74 deletions(-) delete mode 100644 tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference delete mode 100755 tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index a6a791f75b5..49da1eba9ec 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -134,7 +134,6 @@ protected: cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = metadata_snapshot->getColumnsRequiredForPrimaryKey(); cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); - LOG_DEBUG(getLogger("StorageSystemColumns"), "Get column sizes for table {}.{}", database_name, table_name); column_sizes = storage->getColumnSizes(); } diff --git a/tests/integration/test_dictionaries_replace/test.py b/tests/integration/test_dictionaries_replace/test.py index bf406f46cb1..efb65c15c49 100644 --- a/tests/integration/test_dictionaries_replace/test.py +++ b/tests/integration/test_dictionaries_replace/test.py @@ -134,3 +134,5 @@ def test_create_or_replace(database, instance_to_create_dictionary, instances_to expected_result = TSV([[0, 1], [5, 26], [7, 50], [11, 0]]) assert instance.query(select_query) == expected_result assert instance.query(select_query, user="dictget_user") == expected_result + + instance_to_create_dictionary.query(f"DROP DICTIONARY IF EXISTS {database}.dict") diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference deleted file mode 100644 index 1fc09c8d154..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference +++ /dev/null @@ -1,16 +0,0 @@ -Checking input_format_parallel_parsing=false& -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true& -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh deleted file mode 100755 index 8aba362d6af..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel -# TSan does not supports tracing. -# trace_log doesn't work on aarch64 - -# Regression for proper release of Context, -# via tracking memory of external tables. - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") -trap 'rm $tmp_file' EXIT - -$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" - -function run_and_check() -{ - local query_id - query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" - - echo "Checking $*" - - # Run query with external table (implicit StorageMemory user) - $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' - - # Check that temporary table had been destroyed. - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" - WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym - SELECT 1 FROM system.trace_log - PREWHERE - query_id = '$query_id' AND - trace_type = 'MemorySample' AND - /* only deallocations */ - size < 0 AND - event_date >= yesterday() - WHERE - sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' - LIMIT 1 - " -} - -for input_format_parallel_parsing in false true; do - query_args_variants=( - "" - "cancel_http_readonly_queries_on_client_close=1&readonly=1" - "send_progress_in_http_headers=true" - # nested progress callback - "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" - ) - for query_args in "${query_args_variants[@]}"; do - run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" - done -done diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql index e6215c3da1e..0cadd4f5cee 100644 --- a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql +++ b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql @@ -1,3 +1,5 @@ +-- Tags: atomic-database + DROP TABLE IF EXISTS test; CREATE TABLE test (id UInt64, value String) ENGINE=MergeTree ORDER BY id; INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(10); From ea403eec0a8251c92f35f12a704108f3138dca1c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 21 Jun 2024 22:45:00 +0000 Subject: [PATCH 046/100] Fix --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d73c9a10ad9..76b9c26eab5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -898,7 +898,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti assert(as_database_saved.empty() && as_table_saved.empty()); std::swap(create.as_database, as_database_saved); std::swap(create.as_table, as_table_saved); - if (!as_database_saved.empty()) + if (!as_table_saved.empty()) create.is_create_empty = false; return properties; From e59f4b7a18bbc2d8ccc1365c3aceb6107351b861 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 17:50:25 +0200 Subject: [PATCH 047/100] Fix logger.console_log_level (never works) Signed-off-by: Azat Khuzhin --- src/Loggers/Loggers.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0bd4b94d999..35b96bce42a 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -321,7 +321,12 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); if (config.getBool("logger.console", false) || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) - split->setLevel("console", log_level); + { + auto console_log_level_string = config.getString("logger.console_log_level", log_level_string); + auto console_log_level = Poco::Logger::parseLevel(console_log_level_string); + max_log_level = std::max(console_log_level, max_log_level); + split->setLevel("console", console_log_level); + } else split->setLevel("console", 0); From ba99955f11736a74af74a8b1a8f5335c4d4ee11f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 17:51:59 +0200 Subject: [PATCH 048/100] Add missing logger directives into config.xml Signed-off-by: Azat Khuzhin --- .../operations/server-configuration-parameters/settings.md | 3 +++ programs/server/config.xml | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index cdbb42a4ee7..db8157592db 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1463,6 +1463,9 @@ Keys: - `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. - `count` – The number of archived log files that ClickHouse stores. - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `console_log_level` – Logging level for console. Default to `level`. +- `use_syslog` - Log to syslog as well. +- `syslog_level` - Logging level for logging to syslog. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. - `formatting` – Specify log format to be printed in console log (currently only `json` supported). diff --git a/programs/server/config.xml b/programs/server/config.xml index 91066cd2859..1502cad6866 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -29,7 +29,14 @@ --> 1000M 10 + + + + + + + base64En/Decode64URL --- .../functions/string-functions.md | 28 +++++++------- .../functions/string-functions.md | 8 ++-- src/Functions/FunctionBase64Conversion.h | 24 ++++++------ ...ase64UrlDecode.cpp => base64URLDecode.cpp} | 10 ++--- ...ase64UrlEncode.cpp => base64URLEncode.cpp} | 8 ++-- ...64UrlDecode.cpp => tryBase64URLDecode.cpp} | 12 +++--- .../03167_base64_url_functions.sql | 38 +++++++++---------- .../aspell-ignore/en/aspell-dict.txt | 4 +- 8 files changed, 66 insertions(+), 66 deletions(-) rename src/Functions/{base64UrlDecode.cpp => base64URLDecode.cpp} (73%) rename src/Functions/{base64UrlEncode.cpp => base64URLEncode.cpp} (78%) rename src/Functions/{tryBase64UrlDecode.cpp => tryBase64URLDecode.cpp} (69%) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a258456345e..c068b0e9d17 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1168,14 +1168,14 @@ Result: └────────────────────────────┘ ``` -## base64UrlEncode +## base64URLEncode Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). **Syntax** ```sql -base64UrlEncode(url) +base64URLEncode(url) ``` **Arguments** @@ -1189,13 +1189,13 @@ base64UrlEncode(url) **Example** ``` sql -SELECT base64UrlEncode('https://clickhouse.com'); +SELECT base64URLEncode('https://clickhouse.com'); ``` Result: ```result -┌─base64UrlEncode('https://clickhouse.com')─┐ +┌─base64URLEncode('https://clickhouse.com')─┐ │ aHR0cDovL2NsaWNraG91c2UuY29t │ └───────────────────────────────────────────┘ ``` @@ -1234,19 +1234,19 @@ Result: └──────────────────────────────────┘ ``` -## base64UrlDecode +## base64URLDecode Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error. **Syntax** ```sql -base64UrlDecode(encodedUrl) +base64URLDecode(encodedUrl) ``` **Arguments** -- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. **Returned value** @@ -1255,13 +1255,13 @@ base64UrlDecode(encodedUrl) **Example** ``` sql -SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); +SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); ``` Result: ```result -┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ +┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ │ https://clickhouse.com │ └─────────────────────────────────────────────────┘ ``` @@ -1298,19 +1298,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res └────────────┴─────────────┘ ``` -## tryBase64UrlDecode +## tryBase64URLDecode -Like `base64UrlDecode` but returns an empty string in case of error. +Like `base64URLDecode` but returns an empty string in case of error. **Syntax** ```sql -tryBase64UrlDecode(encodedUrl) +tryBase64URLDecode(encodedUrl) ``` **Parameters** -- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1321,7 +1321,7 @@ tryBase64UrlDecode(encodedUrl) Query: ```sql -SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; +SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; ``` ```response diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index fa76e84f130..2436581fc7f 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `TO_BASE64`. -## base64UrlEncode(s) +## base64URLEncode(s) Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). @@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `FROM_BASE64`. -## base64UrlDecode(s) +## base64URLDecode(s) Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение @@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM'); Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. -## tryBase64UrlDecode(s) +## tryBase64URLDecode(s) -Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку. +Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку. ## endsWith(s, suffix) {#endswith} diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 05914be3837..083179c3ca8 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -25,10 +25,10 @@ namespace ErrorCodes enum class Base64Variant : uint8_t { Normal, - Url + URL }; -inline std::string preprocessBase64Url(std::string_view src) +inline std::string preprocessBase64URL(std::string_view src) { std::string padded_src; padded_src.reserve(src.size() + 3); @@ -70,7 +70,7 @@ inline std::string preprocessBase64Url(std::string_view src) return padded_src; } -inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) +inline size_t postprocessBase64URL(UInt8 * dst, size_t out_len) { // Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5 for (size_t i = 0; i < out_len; ++i) @@ -95,7 +95,7 @@ inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) template struct Base64Encode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64UrlEncode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64URLEncode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -111,8 +111,8 @@ struct Base64Encode /// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. __msan_unpoison(dst, outlen); - if constexpr (variant == Base64Variant::Url) - outlen = postprocessBase64Url(dst, outlen); + if constexpr (variant == Base64Variant::URL) + outlen = postprocessBase64URL(dst, outlen); return outlen; } @@ -121,7 +121,7 @@ struct Base64Encode template struct Base64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -132,9 +132,9 @@ struct Base64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else @@ -156,7 +156,7 @@ struct Base64Decode template struct TryBase64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -167,9 +167,9 @@ struct TryBase64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else diff --git a/src/Functions/base64UrlDecode.cpp b/src/Functions/base64URLDecode.cpp similarity index 73% rename from src/Functions/base64UrlDecode.cpp rename to src/Functions/base64URLDecode.cpp index 59975d8f9d1..f5766dc60bd 100644 --- a/src/Functions/base64UrlDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlDecode) +REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; + FunctionDocumentation::Syntax syntax = "base64URLDecode(encodedURL)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/base64UrlEncode.cpp b/src/Functions/base64URLEncode.cpp similarity index 78% rename from src/Functions/base64UrlEncode.cpp rename to src/Functions/base64URLEncode.cpp index 05d50170c14..73a465a30c5 100644 --- a/src/Functions/base64UrlEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlEncode) +REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlEncode(url)"; + FunctionDocumentation::Syntax syntax = "base64URLEncode(url)"; FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/tryBase64UrlDecode.cpp b/src/Functions/tryBase64URLDecode.cpp similarity index 69% rename from src/Functions/tryBase64UrlDecode.cpp rename to src/Functions/tryBase64URLDecode.cpp index b9aaf4f9273..b44bc7538ee 100644 --- a/src/Functions/tryBase64UrlDecode.cpp +++ b/src/Functions/tryBase64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(TryBase64UrlDecode) +REGISTER_FUNCTION(TryBase64URLDecode) { - FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64UrlDecode but returns an empty string in case of an error.)"; - FunctionDocumentation::Syntax syntax = "tryBase64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; + FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64URLDecode but returns an empty string in case of an error.)"; + FunctionDocumentation::Syntax syntax = "tryBase64URLDecode(encodedUrl)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; + FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/tests/queries/0_stateless/03167_base64_url_functions.sql b/tests/queries/0_stateless/03167_base64_url_functions.sql index 674f1ae498b..6c394ba6c3a 100644 --- a/tests/queries/0_stateless/03167_base64_url_functions.sql +++ b/tests/queries/0_stateless/03167_base64_url_functions.sql @@ -2,35 +2,35 @@ -- no-fasttest because aklomp-base64 library is required -- incorrect number of arguments -SELECT base64UrlEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tryBase64UrlDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlEncode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tryBase64UrlDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLEncode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- test with valid inputs -SELECT 'https://clickhouse.com' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); -SELECT '12?' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); -SELECT 'https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'https://clickhouse.com' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT '12?' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT 'https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has no padding -SELECT 'aHR0cHM6Ly9jbGlj' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGlj' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has one-byte padding -SELECT 'aHR0cHM6Ly9jbGlja2g' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGlja2g' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has two-bytes padding -SELECT 'aHR0cHM6Ly9jbGljaw' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGljaw' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- test with invalid inputs -SELECT base64UrlDecode('https://clickhouse.com'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('https://clickhouse.com'); -SELECT base64UrlDecode('12?'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('12?'); -SELECT base64UrlDecode('aHR0cHM6Ly9jbGlja'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja'); +SELECT base64URLDecode('https://clickhouse.com'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('https://clickhouse.com'); +SELECT base64URLDecode('12?'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('12?'); +SELECT base64URLDecode('aHR0cHM6Ly9jbGlja'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja'); -- test FixedString argument -SELECT toFixedString('https://clickhouse.com', 22) AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT toFixedString('https://clickhouse.com', 22) AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 397cdaff212..d031fe4d957 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1012,8 +1012,8 @@ Updatable Uppercased Uptime Uptrace -UrlDecode -UrlEncode +URLDecode +URLEncode UserID Util VARCHAR From d06d2a69db53007991d6e8387f0eff6447ac3fd8 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 27 Jun 2024 11:17:36 +0200 Subject: [PATCH 080/100] Fixed tests --- tests/config/install.sh | 10 +-- ...system_cache_on_write_operations.reference | 79 +++++++++++++++++++ 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index 4dd7b894e71..08ee11a7407 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -182,14 +182,8 @@ elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then ln -sf $SRC_PATH/config.d/azure_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ fi -if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then\ - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - echo "Azure configuration will not be added" - else - echo "Adding azure configuration" - ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ - fi - +if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then + ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 410e4c9ee5d..f53f00992e7 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -219,33 +219,112 @@ SELECT count() FROM test_02241 SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 Using storage policy: azure_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='azure_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 0 +SELECT count(), sum(size) FROM system.filesystem_cache 0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT count(), sum(size) size FROM system.filesystem_cache 8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 +SYSTEM START MERGES test_02241 +OPTIMIZE TABLE test_02241 FINAL +SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache 41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 From c00ad13dc28415c31703b144c08b523b87be2996 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 27 Jun 2024 12:22:45 +0200 Subject: [PATCH 081/100] fix for #65757 --- docs/en/operations/utilities/clickhouse-local.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 93a3fecf3c6..f19643a3fa5 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. Previous example is the same as: ``` bash -$ echo -e "1,2\n3,4" | clickhouse-local --query " +$ echo -e "1,2\n3,4" | clickhouse-local -n --query " CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; - DROP TABLE table" + DROP TABLE table;" Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 1 2 3 4 From 53dba602453258ba10b27f7229b2eec2f36e4f42 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jun 2024 12:47:23 +0200 Subject: [PATCH 082/100] update doc for `cgroups_memory_usage_observer_wait_time` --- docs/en/operations/server-configuration-parameters/settings.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index cdbb42a4ee7..64823960e75 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -498,6 +498,8 @@ Default: 0.9 Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`). +To disable the cgroup observer, set this value to `0`. + Type: UInt64 Default: 15 From 86c3036bfb9eb1f307a79b35efacac3409e87a13 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jun 2024 14:04:44 +0200 Subject: [PATCH 083/100] Fix flaky test_replicated_database::test_alter_attach --- tests/integration/test_replicated_database/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index f23384b5c04..089c7f57d05 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -337,8 +337,10 @@ def test_alter_attach(started_cluster, attachable_part, engine): main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" ) + # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": + dummy_node.query(f"SYSTEM SYNC REPLICA {database}.alter_attach_test LIGHTWEIGHT") assert ( dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" From 988ba2e22dba550d5610a3658571d0bb3493b4c9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jun 2024 14:17:37 +0200 Subject: [PATCH 084/100] Fix mysqlISO8601Date --- src/Functions/parseDateTime.cpp | 3 +-- .../0_stateless/03197_fix_parse_mysql_iso_date.reference | 1 + tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference create mode 100644 tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 11e210d2cc2..7407eadf19c 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -1015,8 +1015,7 @@ namespace [[nodiscard]] static PosOrError mysqlISO8601Date(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) Int32 year; Int32 month; diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference new file mode 100644 index 00000000000..b3ac1c01adc --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference @@ -0,0 +1 @@ +2024-06-20 20:00:00 diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql new file mode 100644 index 00000000000..34796df6299 --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql @@ -0,0 +1 @@ +SELECT parseDateTime(formatDateTime(toDateTime('2024-06-20 12:00:00'), '%F %T', 'Europe/Paris'), '%F %T') AS x From b8e803218c1b500e96b0884eee02716e03bb35f7 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 27 Jun 2024 13:32:23 +0100 Subject: [PATCH 085/100] remote shart --- programs/library-bridge/CMakeLists.txt | 1 - programs/odbc-bridge/CMakeLists.txt | 1 - src/Functions/CMakeLists.txt | 25 -- src/Functions/tests/gtest_ternary_logic.cpp | 354 -------------------- 4 files changed, 381 deletions(-) delete mode 100644 src/Functions/tests/gtest_ternary_logic.cpp diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 2822009a260..86410d712ec 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 580251b5bde..14af330f788 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index e2704f407e3..7c90f83569a 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,34 +3,9 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -set(DBMS_FUNCTIONS - IFunction.cpp - FunctionFactory.cpp - FunctionHelpers.cpp - extractTimeZoneFromFunctionArguments.cpp - FunctionsLogical.cpp - if.cpp - multiIf.cpp - multiMatchAny.cpp - checkHyperscanRegexp.cpp - CastOverloadResolver.cpp - # Provides dependency for cast - createFunctionBaseCast() - FunctionsConversion.cpp -) -extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS}) -extract_into_parent_list(clickhouse_functions_headers dbms_headers - IFunction.h - FunctionFactory.h - FunctionHelpers.h - extractTimeZoneFromFunctionArguments.h - FunctionsLogical.h - CastOverloadResolver.h -) - add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_obj PRIVATE "-g0") - set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0") endif() list (APPEND OBJECT_LIBS $) diff --git a/src/Functions/tests/gtest_ternary_logic.cpp b/src/Functions/tests/gtest_ternary_logic.cpp deleted file mode 100644 index 5ecafabb361..00000000000 --- a/src/Functions/tests/gtest_ternary_logic.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// I know that inclusion of .cpp is not good at all -#include // NOLINT - -using namespace DB; -using TernaryValues = std::vector; - -struct LinearCongruentialGenerator -{ - /// Constants from `man lrand48_r`. - static constexpr UInt64 a = 0x5DEECE66D; - static constexpr UInt64 c = 0xB; - - /// And this is from `head -c8 /dev/urandom | xxd -p` - UInt64 current = 0x09826f4a081cee35ULL; - - UInt32 next() - { - current = current * a + c; - return static_cast(current >> 16); - } -}; - -void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio) -{ - /// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32). - /// See https://linux.die.net/man/3/nrand48 - - double false_percentile = false_ratio; - double null_percentile = false_ratio + null_ratio; - - false_percentile = false_percentile > 1 ? 1 : false_percentile; - null_percentile = null_percentile > 1 ? 1 : null_percentile; - - UInt32 false_threshold = static_cast(static_cast(std::numeric_limits::max()) * false_percentile); - UInt32 null_threshold = static_cast(static_cast(std::numeric_limits::max()) * null_percentile); - - for (Ternary::ResultType * end = output + size; output != end; ++output) - { - UInt32 val = gen.next(); - *output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True); - } -} - -template -ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size) -{ - auto nested_column = ColumnVector::create(size); - auto null_map = ColumnUInt8::create(size); - auto & nested_column_data = nested_column->getData(); - auto & null_map_data = null_map->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::Null) - { - null_map_data[i] = 1; - nested_column_data[i] = 0; - } - else if (ternary_values[i] == Ternary::True) - { - null_map_data[i] = 0; - nested_column_data[i] = 100; - } - else - { - null_map_data[i] = 0; - nested_column_data[i] = 0; - } - } - - return ColumnNullable::create(std::move(nested_column), std::move(null_map)); -} - -template -ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size) -{ - auto column = ColumnVector::create(size); - auto & column_data = column->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::True) - { - column_data[i] = 100; - } - else - { - column_data[i] = 0; - } - } - - return column; -} - -template -ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values) -{ - size_t size = ternary_values.size(); - Ternary::ResultType * ternary_data = ternary_values.data(); - - if constexpr (std::is_same_v) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7); - return createColumnNullable(ternary_data, size); - } - else if constexpr (std::is_same_v>) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0); - return createColumnVector(ternary_data, size); - } - else - { - auto nested_col = ColumnNothing::create(size); - auto null_map = ColumnUInt8::create(size); - - memset(ternary_data, Ternary::Null, size); - - return ColumnNullable::create(std::move(nested_col), std::move(null_map)); - } -} - -/* The truth table of ternary And and Or operations: - * +-------+-------+---------+--------+ - * | a | b | a And b | a Or b | - * +-------+-------+---------+--------+ - * | False | False | False | False | - * | False | Null | False | Null | - * | False | True | False | True | - * | Null | False | False | Null | - * | Null | Null | Null | Null | - * | Null | True | Null | True | - * | True | False | False | True | - * | True | Null | Null | True | - * | True | True | True | True | - * +-------+-------+---------+--------+ - * - * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic - */ -template -bool testTernaryLogicTruthTable() -{ - constexpr size_t size = 9; - - Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType * expected_ternary; - - - if constexpr (std::is_same_v) - { - expected_ternary = and_expected_ternary; - } - else - { - expected_ternary = or_expected_ternary; - } - - auto col_a = createColumnNullable(col_a_ternary, size); - auto col_b = createColumnNullable(col_b_ternary, size); - ColumnRawPtrs arguments = {col_a.get(), col_b.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -template -bool testTernaryLogicOfTwoColumns(size_t size) -{ - LinearCongruentialGenerator gen; - - TernaryValues left_column_ternary(size); - TernaryValues right_column_ternary(size); - TernaryValues expected_ternary(size); - - ColumnPtr left = createRandomColumn(gen, left_column_ternary); - ColumnPtr right = createRandomColumn(gen, right_column_ternary); - - for (size_t i = 0; i < size; ++i) - { - /// Given that False is less than Null and Null is less than True, the And operation can be implemented - /// with std::min, and the Or operation can be implemented with std::max. - if constexpr (std::is_same_v) - { - expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]); - } - else - { - expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]); - } - } - - ColumnRawPtrs arguments = {left.get(), right.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -TEST(TernaryLogicTruthTable, NestedUInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNullable) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, VectorNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} From b972906dab69ddd46b4e66506d1a0868cc4ebf6b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jun 2024 14:42:21 +0200 Subject: [PATCH 086/100] Fix timezone --- .../0_stateless/03197_fix_parse_mysql_iso_date.reference | 2 +- tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference index b3ac1c01adc..94d8605fd74 100644 --- a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference @@ -1 +1 @@ -2024-06-20 20:00:00 +2024-06-20 12:00:00 diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql index 34796df6299..9a996803249 100644 --- a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql @@ -1 +1 @@ -SELECT parseDateTime(formatDateTime(toDateTime('2024-06-20 12:00:00'), '%F %T', 'Europe/Paris'), '%F %T') AS x +SELECT parseDateTime(formatDateTime(toDateTime('2024-06-20 12:00:00'), '%F %T', 'Europe/Paris'), '%F %T', 'UTC') AS x From fcf8189b75e1239319fe38c36711be9a6d91bc2e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jun 2024 15:06:02 +0200 Subject: [PATCH 087/100] More fixes --- src/Functions/parseDateTime.cpp | 12 +++++------- .../03197_fix_parse_mysql_iso_date.reference | 3 ++- .../0_stateless/03197_fix_parse_mysql_iso_date.sql | 3 ++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 7407eadf19c..162b8c58873 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -978,8 +978,7 @@ namespace [[nodiscard]] static PosOrError mysqlAmericanDate(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) Int32 month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, month))) @@ -993,7 +992,7 @@ namespace Int32 year; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, year))) - RETURN_ERROR_IF_FAILED(date.setYear(year)) + RETURN_ERROR_IF_FAILED(date.setYear(year + 2000)) return cur; } @@ -1461,8 +1460,7 @@ namespace [[nodiscard]] static PosOrError jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto result= checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment); !result.has_value()) - return tl::unexpected(result.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment)) String text1(cur, 3); boost::to_lower(text1); @@ -1555,8 +1553,8 @@ namespace Int32 day_of_month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength( cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, day_of_month))) - if (auto res = date.setDayOfMonth(day_of_month); !res.has_value()) - return tl::unexpected(res.error()); + RETURN_ERROR_IF_FAILED(date.setDayOfMonth(day_of_month)) + return cur; } diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference index 94d8605fd74..bd9ab3be3fa 100644 --- a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference @@ -1 +1,2 @@ -2024-06-20 12:00:00 +2024-06-20 00:00:00 +2024-06-20 00:00:00 diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql index 9a996803249..e83738f7214 100644 --- a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql @@ -1 +1,2 @@ -SELECT parseDateTime(formatDateTime(toDateTime('2024-06-20 12:00:00'), '%F %T', 'Europe/Paris'), '%F %T', 'UTC') AS x +SELECT parseDateTime('2024-06-20', '%F', 'UTC') AS x; +SELECT parseDateTime('06/20/24', '%D', 'UTC') AS x; From ecc9d214c30a984761106e8fafa05c62a8838572 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 27 Jun 2024 16:36:26 +0200 Subject: [PATCH 088/100] CI: Fix for Builds report job in backports and releases --- .github/workflows/backport_branches.yml | 6 ++++++ .github/workflows/release_branches.yml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 48e8fbbba05..64c3d2f8342 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -167,10 +167,16 @@ jobs: steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds - name: Builds report run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds ############################################################################################ #################################### INSTALL PACKAGES ###################################### ############################################################################################ diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 3e898c69ade..6bf846d7535 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -184,10 +184,16 @@ jobs: steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds - name: Builds report run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds MarkReleaseReady: if: ${{ !failure() && !cancelled() }} needs: From 3362b521d23e1031fa01181a49e6f6abb469bb29 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 27 Jun 2024 14:43:44 +0000 Subject: [PATCH 089/100] Automatic style fix --- tests/integration/test_replicated_database/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 089c7f57d05..60a6e099b22 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -340,7 +340,9 @@ def test_alter_attach(started_cluster, attachable_part, engine): # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": - dummy_node.query(f"SYSTEM SYNC REPLICA {database}.alter_attach_test LIGHTWEIGHT") + dummy_node.query( + f"SYSTEM SYNC REPLICA {database}.alter_attach_test LIGHTWEIGHT" + ) assert ( dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" From 2265861f836b7cb346c74dfdee370295695d4849 Mon Sep 17 00:00:00 2001 From: Linh Giang <165205637+linhgiang24@users.noreply.github.com> Date: Thu, 27 Jun 2024 09:38:19 -0600 Subject: [PATCH 090/100] fixed misspelled word originally "quering" changed to "querying" --- docs/en/sql-reference/statements/alter/view.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index fb7a5bd7c03..5f3dae0a9c0 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts) ORDER BY (event_type, ts, browser) SETTINGS index_granularity = 8192 --- !!! The columns' definition is unchanged but it does not matter, we are not quering --- MATERIALIZED VIEW, we are quering TO (storage) table. +-- !!! The columns' definition is unchanged but it does not matter, we are not querying +-- MATERIALIZED VIEW, we are querying TO (storage) table. -- SELECT section is updated. SHOW CREATE TABLE mv FORMAT TSVRaw; From fbb96d17de9f4e8ce03e03feda0e099edfe12039 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 27 Jun 2024 16:44:00 +0200 Subject: [PATCH 091/100] CI: New create release workflow --- .github/workflows/create_release.yml | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/create_release.yml diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml new file mode 100644 index 00000000000..3988df3b2b1 --- /dev/null +++ b/.github/workflows/create_release.yml @@ -0,0 +1,29 @@ +name: CreateRelease + +concurrency: + group: release + +'on': + workflow_dispatch: + inputs: + sha: + description: 'The SHA hash of the commit from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: choice + options: + - new + - patch + +jobs: + Release: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Print greeting + run: | + python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run From 9e4d976e5e466c3d7da8da13ddbcdb87ddc87e4a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 17:24:43 +0000 Subject: [PATCH 092/100] Fix clickhouse-test invocation --- docs/en/development/tests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 8dff6f0ed1d..269995a1a96 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository folder and run the following command: ``` -PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key +PATH=:$PATH tests/clickhouse-test 01428_hash_set_nan_key ``` Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which From 78c659e8ce727dc6329407f54a3edf79e83e213c Mon Sep 17 00:00:00 2001 From: justindeguzman Date: Thu, 27 Jun 2024 12:15:48 -0700 Subject: [PATCH 093/100] [Docs] Specify that Named Collections are not supported in Cloud --- docs/en/operations/named-collections.md | 4 ++++ docs/en/sql-reference/statements/alter/named-collection.md | 4 ++++ docs/en/sql-reference/statements/create/named-collection.md | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 91438cfb675..59ee05d1f9e 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -5,6 +5,10 @@ sidebar_label: "Named collections" title: "Named collections" --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + Named collections provide a way to store collections of key-value pairs to be used to configure integrations with external sources. You can use named collections with dictionaries, tables, table functions, and object storage. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md index 71d4bfadd9c..ab772fe4dcf 100644 --- a/docs/en/sql-reference/statements/alter/named-collection.md +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # ALTER NAMED COLLECTION This query intends to modify already existing named collections. diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md index f69fa2e3678..a4e146c814c 100644 --- a/docs/en/sql-reference/statements/create/named-collection.md +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # CREATE NAMED COLLECTION Creates a new named collection. From 2dc571b759b40672047d831e184c1f63e2377868 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 28 Jun 2024 00:50:50 +0200 Subject: [PATCH 094/100] Forbid join algorithm randomisation for 03094_one_thousand_joins --- tests/queries/0_stateless/03094_one_thousand_joins.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03094_one_thousand_joins.sql b/tests/queries/0_stateless/03094_one_thousand_joins.sql index ea159f0e4c0..1f6bd99df7f 100644 --- a/tests/queries/0_stateless/03094_one_thousand_joins.sql +++ b/tests/queries/0_stateless/03094_one_thousand_joins.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest, no-tsan, long -- (no-tsan because it has a small maximum stack size and the test would fail with TOO_DEEP_RECURSION) +SET join_algorithm = 'default'; -- for 'full_sorting_merge' the query is 10x slower SET allow_experimental_analyzer = 1; -- old analyzer returns TOO_DEEP_SUBQUERIES -- Bug 33446, marked as 'long' because it still runs around 10 sec From 3bc41192d8db143ccc8049b0acdfb3c8dfded340 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 27 Jun 2024 23:27:14 +0000 Subject: [PATCH 095/100] Fix 02931_rewrite_sum_column_and_constant flakiness --- ..._rewrite_sum_column_and_constant.reference | 26 +++++++++---------- .../02931_rewrite_sum_column_and_constant.sql | 11 ++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference index 3124698d218..f9b72ba9c6a 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference @@ -245,21 +245,21 @@ EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); SELECT (2 * count(uint64)) - sum(uint64) FROM test_table SELECT sum(float64 + 2) From test_table; -26.5 +26.875 SELECT sum(2 + float64) From test_table; -26.5 +26.875 SELECT sum(float64 - 2) From test_table; -6.5 +6.875 SELECT sum(2 - float64) From test_table; --6.5 +-6.875 SELECT sum(float64) + 2 * count(float64) From test_table; -26.5 +26.875 SELECT 2 * count(float64) + sum(float64) From test_table; -26.5 +26.875 SELECT sum(float64) - 2 * count(float64) From test_table; -6.5 +6.875 SELECT 2 * count(float64) - sum(float64) From test_table; --6.5 +-6.875 EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); SELECT sum(float64) + (2 * count(float64)) FROM test_table @@ -375,25 +375,25 @@ EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - SELECT ((2 * count(uint64)) - sum(uint64)) + ((3 * count(uint64)) - sum(uint64)) FROM test_table SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; -58 +58.75 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; -5 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; -8 +8.75 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; 5 SELECT sum(2 - float64) - sum(3 - float64) From test_table; -5 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; -58 +58.75 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; -5 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; -8 +8.75 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; 5 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; --8 +-8.75 EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) FROM test_table diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql index c7b0ff82442..94baee6f1ba 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql @@ -23,11 +23,12 @@ CREATE TABLE test_table decimal32 Decimal32(5), ) ENGINE=MergeTree ORDER BY uint64; -INSERT INTO test_table VALUES (1, 1.1, 1.11); -INSERT INTO test_table VALUES (2, 2.2, 2.22); -INSERT INTO test_table VALUES (3, 3.3, 3.33); -INSERT INTO test_table VALUES (4, 4.4, 4.44); -INSERT INTO test_table VALUES (5, 5.5, 5.55); +-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order. +INSERT INTO test_table VALUES (1, 1.125, 1.11); +INSERT INTO test_table VALUES (2, 2.250, 2.22); +INSERT INTO test_table VALUES (3, 3.375, 3.33); +INSERT INTO test_table VALUES (4, 4.500, 4.44); +INSERT INTO test_table VALUES (5, 5.625, 5.55); -- { echoOn } SELECT sum(uint64 + 1 AS i) from test_table where i > 0; From 8dbf159f3faf4da0e0bfa6dbb09518e38816978e Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 28 Jun 2024 01:24:20 +0000 Subject: [PATCH 096/100] fix --- src/Interpreters/ExpressionActions.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 04f29f35c3c..87082497581 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -194,6 +194,10 @@ static void setLazyExecutionInfo( } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); + /// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent, + /// disable the current node. + if (indexes.empty()) + lazy_execution_info.can_be_lazy_executed = false; } else /// If lazy execution is disabled for one of parents, we should disable it for current node. @@ -291,9 +295,9 @@ static std::unordered_set processShortCircuitFunctions /// Firstly, find all short-circuit functions and get their settings. std::unordered_map short_circuit_nodes; - IFunctionBase::ShortCircuitSettings short_circuit_settings; for (const auto & node : nodes) { + IFunctionBase::ShortCircuitSettings short_circuit_settings; if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->isShortCircuit(short_circuit_settings, node.children.size()) && !node.children.empty()) short_circuit_nodes[&node] = short_circuit_settings; } From f15755ecc6b14ff7cfb803fefc58d248b9dafb43 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 28 Jun 2024 01:38:06 +0000 Subject: [PATCH 097/100] add test --- .../03071_fix_short_circuit_logic.reference | 1 + .../03071_fix_short_circuit_logic.sql | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 tests/queries/0_stateless/03071_fix_short_circuit_logic.reference create mode 100644 tests/queries/0_stateless/03071_fix_short_circuit_logic.sql diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference new file mode 100644 index 00000000000..48aedfc3958 --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference @@ -0,0 +1 @@ +2024-01-02 16:54:59 diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql new file mode 100644 index 00000000000..7745bceca0b --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql @@ -0,0 +1,62 @@ + + +CREATE FUNCTION IF NOT EXISTS unhexPrefixed AS value -> unhex(substring(value, 3)); +CREATE FUNCTION IF NOT EXISTS hex2bytes AS address -> CAST(unhexPrefixed(address), 'FixedString(20)'); +CREATE FUNCTION IF NOT EXISTS bytes2hex AS address -> concat('0x', lower(hex(address))); + +CREATE TABLE test +( + `transfer_id` String, + `address` FixedString(20), + `value` UInt256, + `block_timestamp` DateTime('UTC'), + `token_address` FixedString(20) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(block_timestamp) +PRIMARY KEY (address, block_timestamp) +ORDER BY (address, block_timestamp); + +INSERT INTO test SELECT 'token-transfer-0x758f1bbabb160683e1c80ed52dcd24a32b599d40edf1cec91b5f1199c0e392a2-56', hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459'), 3000000000000000000000, '2024-01-02 16:54:59', 'abc'; + +CREATE TABLE token_data +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +ENGINE = TinyLog; + +INSERT INTO token_data SELECT bytes2hex('abc'), 'zksync', false; + +CREATE DICTIONARY token_data_map +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +PRIMARY KEY token_address_hex, chain +SOURCE(Clickhouse(table token_data)) +LIFETIME(MIN 200 MAX 300) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT block_timestamp +FROM +( + SELECT + block_timestamp, + bytes2hex(token_address) AS token_address_hex + FROM + ( + SELECT + transfer_id, + address, + value, + block_timestamp, + token_address, + 'zksync' AS chain + FROM test + ) + WHERE (address = hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459')) AND (transfer_id NOT LIKE 'gas%') AND (value > 0) AND (dictGetOrDefault(token_data_map, 'is_blacklisted', (token_address_hex, 'zksync'), true)) +) +SETTINGS max_threads = 1, short_circuit_function_evaluation = 'enable', allow_experimental_analyzer = 0; \ No newline at end of file From bbfdf1698f0296a254ff63f929822334a0788a3d Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 28 Jun 2024 05:13:56 +0200 Subject: [PATCH 098/100] Fix --- base/base/getFQDNOrHostName.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/base/getFQDNOrHostName.cpp b/base/base/getFQDNOrHostName.cpp index 2a4ba8e2e11..6b3da9699b9 100644 --- a/base/base/getFQDNOrHostName.cpp +++ b/base/base/getFQDNOrHostName.cpp @@ -6,6 +6,9 @@ namespace { std::string getFQDNOrHostNameImpl() { +#if defined(OS_DARWIN) + return Poco::Net::DNS::hostName(); +#else try { return Poco::Net::DNS::thisHost().name(); @@ -14,6 +17,7 @@ namespace { return Poco::Net::DNS::hostName(); } +#endif } } From 8e034c499f58685216157d9351c4a2cc3296af81 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 28 Jun 2024 11:58:10 +0000 Subject: [PATCH 099/100] Propigate join_any_take_last_row to hash join in any query --- src/Core/Settings.h | 2 +- src/Planner/PlannerJoins.cpp | 11 +++++------ .../0_stateless/00830_join_overwrite.reference | 2 ++ tests/queries/0_stateless/00830_join_overwrite.sql | 9 +++++++++ 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 41878142bdc..a6a997deddd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -470,7 +470,7 @@ class IColumn; M(UInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).", 0) \ M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \ M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ - M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \ + M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key. Can be applied only to hash join and storage join.", IMPORTANT) \ M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \ M(UInt64, cross_join_min_rows_to_compress, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 84efdd21336..67fe45cad7e 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -802,13 +802,12 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, algorithm == JoinAlgorithm::PARALLEL_HASH || algorithm == JoinAlgorithm::DEFAULT) { - if (table_join->allowParallelHashJoin()) - { - auto query_context = planner_context->getQueryContext(); - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); - } + auto query_context = planner_context->getQueryContext(); - return std::make_shared(table_join, right_table_expression_header); + if (table_join->allowParallelHashJoin()) + return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + + return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); } if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE) diff --git a/tests/queries/0_stateless/00830_join_overwrite.reference b/tests/queries/0_stateless/00830_join_overwrite.reference index 4792e70f333..e7d6081b647 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.reference +++ b/tests/queries/0_stateless/00830_join_overwrite.reference @@ -1,2 +1,4 @@ 2 3 +2 +3 diff --git a/tests/queries/0_stateless/00830_join_overwrite.sql b/tests/queries/0_stateless/00830_join_overwrite.sql index cb7e277906b..bc3662528db 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.sql +++ b/tests/queries/0_stateless/00830_join_overwrite.sql @@ -9,5 +9,14 @@ INSERT INTO kv_overwrite VALUES (1, 2); INSERT INTO kv_overwrite VALUES (1, 3); SELECT joinGet('kv_overwrite', 'v', toUInt32(1)); + +CREATE TABLE t2 (k UInt32, v UInt32) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 2), (1, 3); + +SET allow_experimental_analyzer = 1; + +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 0; +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 1; + DROP TABLE kv; DROP TABLE kv_overwrite; From 8f9beffc6531e1adc6bab0387bf9df624822be6e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 28 Jun 2024 16:04:45 +0200 Subject: [PATCH 100/100] No jemalloc profiler for non-Linux --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 6c874221a94..023fdcf103a 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -36,7 +36,7 @@ if (OS_LINUX) # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") else() - set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() # CACHE variable is empty to allow changing defaults without the necessity # to purge cache