From 0ad911c413dd9c37f3db61e1af7e613df8af40fa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 3 Oct 2022 11:24:33 +0000 Subject: [PATCH 001/113] Add more checks into ThreadStatus ctor. --- src/Common/ThreadStatus.cpp | 4 ++++ src/Processors/Transforms/buildPushingToViewsChain.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index b62a7af6c71..1e60fb96916 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -92,6 +92,8 @@ std::vector ThreadGroupStatus ThreadStatus::ThreadStatus() : thread_id{getThreadId()} { + chassert(!current_thread); + last_rusage = std::make_unique(); memory_tracker.setDescription("(for thread)"); @@ -145,6 +147,8 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { + chassert(current_thread == this); + memory_tracker.adjustWithUntrackedMemory(untracked_memory); if (thread_group) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ea088c45471..848345784dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -273,7 +273,7 @@ Chain buildPushingToViewsChain( /// and switch back to the original thread_status. auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - + current_thread = nullptr; std::unique_ptr view_thread_status_ptr = std::make_unique(); /// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one /// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means From e6d6d41f22102e3e6b3f60996e0baec34e3b0432 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 22 May 2023 12:23:31 +0800 Subject: [PATCH 002/113] minor improve read buffer of hdfs --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..fe1c2cc2815 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -41,6 +41,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) + { + return false; + } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; From 15b847b41058de86ee1e85ef7acb61902c08713b Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 24 May 2023 09:49:12 +0800 Subject: [PATCH 003/113] init file_size as 0 --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index fe1c2cc2815..904c1c89c16 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -41,7 +41,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory Date: Fri, 30 Jun 2023 13:54:57 +0200 Subject: [PATCH 004/113] Update ReadBufferFromHDFS.cpp --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 904c1c89c16..23218e4c1f4 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -64,7 +64,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory Date: Fri, 30 Jun 2023 16:33:07 +0000 Subject: [PATCH 005/113] Not sending any result --- src/Functions/array/arrayIntersect.cpp | 99 ++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 15 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index d1bbd169513..d25db9fc10e 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -18,6 +18,7 @@ #include #include #include +#include "Common/Exception.h" #include #include #include @@ -564,29 +565,97 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable null_map.push_back(1); } - for (const auto & pair : map) - { - if (pair.getMapped() == args) - { - ++result_offset; - if constexpr (is_numeric_column) - result_data.insertValue(pair.getKey()); - else if constexpr (std::is_same_v || std::is_same_v) - result_data.insertData(pair.getKey().data, pair.getKey().size); - else - result_data.deserializeAndInsertFromArena(pair.getKey().data); + // for (const auto & pair : map) + // { + // if (pair.getMapped() == args) + // { + // ++result_offset; + // if constexpr (is_numeric_column) + // result_data.insertValue(pair.getKey()); + // else if constexpr (std::is_same_v || std::is_same_v) + // result_data.insertData(pair.getKey().data, pair.getKey().size); + // else + // result_data.deserializeAndInsertFromArena(pair.getKey().data); - if (all_nullable) - null_map.push_back(0); - } - } + // if (all_nullable) + // null_map.push_back(0); + // } + // } result_offsets.getElement(row) = result_offset; } + for (size_t row = 0; row < rows; ++row) + { + for (size_t arg_num = 0; arg_num < 1; ++arg_num) + { + const auto & arg = arrays.args[arg_num]; + size_t off; + // const array has only one row + if (arg.is_const) + off = (*arg.offsets)[0]; + else + off = (*arg.offsets)[row]; + + prev_off[arg_num] = off; + if (arg.is_const) + prev_off[arg_num] = 0; + for (size_t res_num = 0; res_num < result_offset; ++res_num) + { + typename Map::LookupResult pair; + + if constexpr (is_numeric_column) + { + pair = map.find(columns[arg_num]->getElement(res_num)); + } + else if constexpr (std::is_same_v || std::is_same_v) + pair = map.find(columns[arg_num]->getDataAt(res_num)); + else + { + const char * data = nullptr; + pair = map.find(columns[arg_num]->serializeValueIntoArena(res_num, arena, data)); + } + + if (pair->getMapped() == args)//for (const auto & pair : map) + { + if constexpr (is_numeric_column) + { + if (pair->getKey() == columns[arg_num]->getElement(res_num)) + { + ++result_offset; + result_data.insertValue(pair->getKey()); + } + } + else if constexpr (std::is_same_v || std::is_same_v) + { + if (pair->getKey() == columns[arg_num]->getDataAt(res_num)) + { + ++result_offset; + result_data.insertData(pair->getKey().data, pair->getKey().size); + } + } + else + { + const char * data = nullptr; + if (pair->getKey() == columns[arg_num]->serializeValueIntoArena(res_num, arena, data)) + { + ++result_offset; + result_data.deserializeAndInsertFromArena(pair->getKey().data); + } + } + if (all_nullable) + null_map.push_back(0); + } + } + + + } + } ColumnPtr result_column = std::move(result_data_ptr); if (all_nullable) result_column = ColumnNullable::create(result_column, std::move(null_map_column)); return ColumnArray::create(result_column, std::move(result_offsets_ptr)); + + } From 2aa2f39e368ea9ece3c52623261c735d97a0ecca Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 3 Jul 2023 11:14:56 +0000 Subject: [PATCH 006/113] Not completed 'problem with offsets if the second array is smaller' --- src/Functions/array/arrayIntersect.cpp | 60 +++++++++++++++++--------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index d25db9fc10e..9d0021782ac 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -565,29 +565,31 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable null_map.push_back(1); } - // for (const auto & pair : map) - // { - // if (pair.getMapped() == args) - // { - // ++result_offset; - // if constexpr (is_numeric_column) - // result_data.insertValue(pair.getKey()); - // else if constexpr (std::is_same_v || std::is_same_v) - // result_data.insertData(pair.getKey().data, pair.getKey().size); - // else - // result_data.deserializeAndInsertFromArena(pair.getKey().data); + for (const auto & pair : map) + { + if (pair.getMapped() == args) + { + ++result_offset; + // if constexpr (is_numeric_column) + // result_data.insertValue(pair.getKey()); + // else if constexpr (std::is_same_v || std::is_same_v) + // result_data.insertData(pair.getKey().data, pair.getKey().size); + // else + // result_data.deserializeAndInsertFromArena(pair.getKey().data); - // if (all_nullable) - // null_map.push_back(0); - // } - // } - result_offsets.getElement(row) = result_offset; + // if (all_nullable) + // null_map.push_back(0); + } + } + // result_offsets.getElement(row) = result_offset; } for (size_t row = 0; row < rows; ++row) { + bool all_has_nullable = all_nullable; for (size_t arg_num = 0; arg_num < 1; ++arg_num) { + bool current_has_nullable = false; const auto & arg = arrays.args[arg_num]; size_t off; // const array has only one row @@ -599,8 +601,12 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable prev_off[arg_num] = off; if (arg.is_const) prev_off[arg_num] = 0; + // throw Exception(ErrorCodes::LOGICAL_ERROR, "{}", result_offset); for (size_t res_num = 0; res_num < result_offset; ++res_num) { + if (arg.null_map && (*arg.null_map)[row]) + current_has_nullable = true; + typename Map::LookupResult pair; if constexpr (is_numeric_column) @@ -615,13 +621,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable pair = map.find(columns[arg_num]->serializeValueIntoArena(res_num, arena, data)); } + prev_off[arg_num] = off; + if (arg.is_const) + prev_off[arg_num] = 0; + + if (!current_has_nullable) + all_has_nullable = false; + if (pair->getMapped() == args)//for (const auto & pair : map) { if constexpr (is_numeric_column) { if (pair->getKey() == columns[arg_num]->getElement(res_num)) { - ++result_offset; + // ++result_offset; result_data.insertValue(pair->getKey()); } } @@ -629,7 +642,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable { if (pair->getKey() == columns[arg_num]->getDataAt(res_num)) { - ++result_offset; + // ++result_offset; result_data.insertData(pair->getKey().data, pair->getKey().size); } } @@ -638,14 +651,21 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable const char * data = nullptr; if (pair->getKey() == columns[arg_num]->serializeValueIntoArena(res_num, arena, data)) { - ++result_offset; + // ++result_offset; result_data.deserializeAndInsertFromArena(pair->getKey().data); } } if (all_nullable) null_map.push_back(0); } - } + } + if (all_has_nullable) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + } + result_offsets.getElement(row) = result_offset; } From 3a4cb8cb67e0feb523e52adf2d9aff389afe2419 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:21:53 +0300 Subject: [PATCH 007/113] Update ThreadStatus.cpp --- src/Common/ThreadStatus.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index a4d1004e44a..2e37885646a 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -126,7 +126,6 @@ ThreadStatus::ThreadStatus() ThreadGroupPtr ThreadStatus::getThreadGroup() const { chassert(current_thread == this); - memory_tracker.adjustWithUntrackedMemory(untracked_memory); return thread_group; } From 98aa6b317f6324c58d814025d543d7616a1bf7ee Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 4 Jul 2023 16:50:31 +0000 Subject: [PATCH 008/113] Support reading subcolumns from file/s3/hdfs/url/azureBlobStorage table functions --- src/Core/Settings.h | 8 +- src/Core/SettingsQuirks.cpp | 6 + src/Formats/FormatFactory.cpp | 18 --- src/Formats/FormatFactory.h | 2 - src/Formats/FormatSettings.h | 3 - .../Formats/Impl/ArrowBlockInputFormat.cpp | 2 - .../Formats/Impl/ArrowColumnToCHColumn.cpp | 60 ++++--- .../Formats/Impl/ArrowColumnToCHColumn.h | 2 - .../Formats/Impl/AvroRowInputFormat.cpp | 2 + .../Formats/Impl/ORCBlockInputFormat.cpp | 7 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 - .../Impl/ParquetMetadataInputFormat.cpp | 1 - .../Transforms/ExtractColumnsTransform.cpp | 35 +++++ .../Transforms/ExtractColumnsTransform.h | 26 +++ .../DataLakes/DeltaLakeMetadataParser.cpp | 1 - src/Storages/HDFS/StorageHDFS.cpp | 71 +++------ src/Storages/HDFS/StorageHDFS.h | 14 +- src/Storages/HDFS/StorageHDFSCluster.h | 2 + src/Storages/StorageAzureBlob.cpp | 73 ++------- src/Storages/StorageAzureBlob.h | 12 +- src/Storages/StorageFile.cpp | 148 +++++++----------- src/Storages/StorageFile.h | 2 + src/Storages/StorageS3.cpp | 70 ++------- src/Storages/StorageS3.h | 12 +- src/Storages/StorageS3Cluster.h | 2 + src/Storages/StorageURL.cpp | 78 ++++----- src/Storages/StorageURL.h | 14 +- src/Storages/StorageURLCluster.h | 2 + src/Storages/prepareReadingFromFormat.cpp | 69 ++++++++ src/Storages/prepareReadingFromFormat.h | 26 +++ .../test_storage_azure_blob_storage/test.py | 37 +++++ tests/integration/test_storage_hdfs/test.py | 48 ++++++ tests/integration/test_storage_s3/test.py | 63 ++++++++ ...02797_read_subcolumns_from_files.reference | 4 + .../02797_read_subcolumns_from_files.sh | 18 +++ 35 files changed, 541 insertions(+), 399 deletions(-) create mode 100644 src/Processors/Transforms/ExtractColumnsTransform.cpp create mode 100644 src/Processors/Transforms/ExtractColumnsTransform.h create mode 100644 src/Storages/prepareReadingFromFormat.cpp create mode 100644 src/Storages/prepareReadingFromFormat.h create mode 100644 tests/queries/0_stateless/02797_read_subcolumns_from_files.reference create mode 100755 tests/queries/0_stateless/02797_read_subcolumns_from_files.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d42bd582ed..710a6e4f135 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -825,8 +825,11 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ + MAKE_OBSOLETE(M, Bool, input_format_arrow_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \ - /** The section above is for obsolete settings. Do not add anything there. */ +/** The section above is for obsolete settings. Do not add anything there. */ #define FORMAT_FACTORY_SETTINGS(M, ALIAS) \ @@ -845,12 +848,9 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ - M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ - M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ - M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 3326f42adf5..cee9c3d497c 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -71,6 +71,12 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log) } } +//#if defined(THREAD_SANITIZER) + settings.use_hedged_requests.value = false; + if (log) + LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan"); +//#endif + if (!queryProfilerWorks()) { if (settings.query_profiler_real_time_period_ns) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index ad991aa0335..6e1e574e687 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -117,7 +117,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size; format_settings.parquet.row_group_bytes = settings.output_format_parquet_row_group_size_bytes; format_settings.parquet.output_version = settings.output_format_parquet_version; - format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; @@ -161,7 +160,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.with_types_use_header = settings.input_format_with_types_use_header; format_settings.write_statistics = settings.output_format_write_statistics; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; - format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; @@ -169,11 +167,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string; format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array; format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method; - format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; @@ -676,14 +672,6 @@ void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) target = true; } -void FormatFactory::markFormatSupportsSubcolumns(const String & name) -{ - auto & target = dict[name].supports_subcolumns; - if (target) - throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subcolumns", name); - target = true; -} - void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) { auto & target = dict[name].prefers_large_blocks; @@ -692,12 +680,6 @@ void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) target = true; } -bool FormatFactory::checkIfFormatSupportsSubcolumns(const String & name) const -{ - const auto & target = getCreators(name); - return target.supports_subcolumns; -} - bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const { const auto & target = getCreators(name); diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 489db944ee6..fc4ab6d4893 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -228,10 +228,8 @@ public: void markOutputFormatSupportsParallelFormatting(const String & name); void markOutputFormatPrefersLargeBlocks(const String & name); - void markFormatSupportsSubcolumns(const String & name); void markFormatSupportsSubsetOfColumns(const String & name); - bool checkIfFormatSupportsSubcolumns(const String & name) const; bool checkIfFormatSupportsSubsetOfColumns(const String & name) const; bool checkIfFormatHasSchemaReader(const String & name) const; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 07d4a7ede4a..6402df33bd9 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -100,7 +100,6 @@ struct FormatSettings { UInt64 row_group_size = 1000000; bool low_cardinality_as_dictionary = false; - bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; @@ -212,7 +211,6 @@ struct FormatSettings { UInt64 row_group_rows = 1000000; UInt64 row_group_bytes = 512 * 1024 * 1024; - bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; @@ -317,7 +315,6 @@ struct FormatSettings struct { - bool import_nested = false; bool allow_missing_columns = false; int64_t row_batch_size = 100'000; bool skip_columns_with_unsupported_types_in_schema_inference = false; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 2fadc09e80f..4293407379e 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -143,7 +143,6 @@ void ArrowBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Arrow", - format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, format_settings.null_as_default, format_settings.arrow.case_insensitive_column_matching); @@ -190,7 +189,6 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatSupportsSubcolumns("Arrow"); factory.markFormatSupportsSubsetOfColumns("Arrow"); factory.registerInputFormat( "ArrowStream", diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5a7306111a5..74d4553a58b 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1032,13 +1032,11 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( ArrowColumnToCHColumn::ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) - , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) , null_as_default(null_as_default_) , case_insensitive_matching(case_insensitive_matching_) @@ -1080,42 +1078,40 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (!name_to_column_ptr.contains(search_column_name)) { bool read_from_nested = false; - /// Check if it's a column from nested table. - if (import_nested) + /// Check if it's a subcolumn from some struct. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) { - String nested_table_name = Nested::extractTableName(header_column.name); - String search_nested_table_name = nested_table_name; - if (case_insensitive_matching) - boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - if (!nested_tables.contains(search_nested_table_name)) + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) { - NamesAndTypesList nested_columns; - for (const auto & name_and_type : header.getNamesAndTypesList()) - { - if (name_and_type.name.starts_with(nested_table_name + ".")) - nested_columns.push_back(name_and_type); - } - auto nested_table_type = Nested::collect(nested_columns).front().type; + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( - arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; - BlockPtr block_ptr = std::make_shared(cols); - auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); - nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; - } - auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); - if (nested_column) - { - column = *nested_column; - if (case_insensitive_matching) - column.name = header_column.name; - read_from_nested = true; - } + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( + arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; } } + if (!read_from_nested) { if (!allow_missing_columns) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 64ff99c70ac..57f33069e0e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -24,7 +24,6 @@ public: ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); @@ -53,7 +52,6 @@ public: private: const Block & header; const std::string format_name; - bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; bool null_as_default; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1ec7491658e..74108beb370 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1223,6 +1223,8 @@ void registerInputFormatAvro(FormatFactory & factory) { return std::make_shared(sample, buf, params, settings); }); + + factory.markFormatSupportsSubsetOfColumns("AvroConfluent"); } void registerAvroSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 016f07731d5..ab4e07376f3 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -125,16 +125,12 @@ void ORCBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "ORC", - format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.null_as_default, format_settings.orc.case_insensitive_column_matching); const bool ignore_case = format_settings.orc.case_insensitive_column_matching; - std::unordered_set nested_table_names; - if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); - + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); for (int i = 0; i < schema->num_fields(); ++i) { const auto & name = schema->field(i)->name(); @@ -171,7 +167,6 @@ void registerInputFormatORC(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ORC"); factory.markFormatSupportsSubsetOfColumns("ORC"); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3dde8ad6a6c..d2df5e5dc8e 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -143,7 +143,6 @@ void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) row_group.arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, format_settings.parquet.case_insensitive_column_matching); @@ -415,7 +414,6 @@ void registerInputFormatParquet(FormatFactory & factory) max_parsing_threads, min_bytes_for_seek); }); - factory.markFormatSupportsSubcolumns("Parquet"); factory.markFormatSupportsSubsetOfColumns("Parquet"); } diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index 229a0630328..6f189816eff 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -504,7 +504,6 @@ void registerInputFormatParquetMetadata(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ParquetMetadata"); factory.markFormatSupportsSubsetOfColumns("ParquetMetadata"); } diff --git a/src/Processors/Transforms/ExtractColumnsTransform.cpp b/src/Processors/Transforms/ExtractColumnsTransform.cpp new file mode 100644 index 00000000000..44bf5582290 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ + +ExtractColumnsTransform::ExtractColumnsTransform(const Block & header_, const NamesAndTypesList & requested_columns_) + : ISimpleTransform(header_, transformHeader(header_, requested_columns_), false), requested_columns(requested_columns_) +{ + +} + +Block ExtractColumnsTransform::transformHeader(Block header, const NamesAndTypesList & requested_columns_) +{ + ColumnsWithTypeAndName columns; + columns.reserve(requested_columns_.size()); + for (const auto & required_column : requested_columns_) + columns.emplace_back(getColumnFromBlock(header, required_column), required_column.type, required_column.name); + + return Block(std::move(columns)); +} + +void ExtractColumnsTransform::transform(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + Columns columns; + columns.reserve(requested_columns.size()); + for (const auto & required_column : requested_columns) + columns.emplace_back(getColumnFromBlock(block, required_column)); + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/src/Processors/Transforms/ExtractColumnsTransform.h b/src/Processors/Transforms/ExtractColumnsTransform.h new file mode 100644 index 00000000000..f8b3d803736 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + +/// Extracts required columns and subcolumns from the block. +class ExtractColumnsTransform final : public ISimpleTransform +{ +public: + ExtractColumnsTransform( + const Block & header_, + const NamesAndTypesList & requested_columns_); + + String getName() const override { return "ExtractColumnsTransform"; } + + static Block transformHeader(Block header, const NamesAndTypesList & requested_columns_); + +protected: + void transform(Chunk & chunk) override; + +private: + const NamesAndTypesList requested_columns; +}; + +} diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 309aa54909a..1172a40627d 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -281,7 +281,6 @@ struct DeltaLakeMetadataParser::Impl ArrowColumnToCHColumn column_reader( header, "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, /* null_as_default */true, /* case_insensitive_column_matching */false); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index a41c65cdb2e..c662c21e60f 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -332,30 +333,21 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() return pimpl->next(); } -Block HDFSSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - HDFSSource::HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - std::shared_ptr file_iterator_, - ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_)) + std::shared_ptr file_iterator_) + : ISource(info.source_header) , WithContext(context_) , storage(std::move(storage_)) - , block_for_format(block_for_format_) - , requested_virtual_columns(requested_virtual_columns_) + , block_for_format(info.format_header) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , max_block_size(max_block_size_) , file_iterator(file_iterator_) - , columns_description(std::move(columns_description_)) + , columns_description(info.columns_description) { initialize(); } @@ -408,6 +400,14 @@ bool HDFSSource::initialize() return std::make_shared(header, columns_description, *input_format, getContext()); }); } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); return true; @@ -640,50 +640,17 @@ Pipe StorageHDFS::read( }); } - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, - block_for_format, - requested_virtual_columns, context_, max_block_size, - iterator_wrapper, - columns_description)); + iterator_wrapper)); } return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..239da19620d 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace DB @@ -74,6 +75,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, @@ -140,16 +143,12 @@ public: using IteratorWrapper = std::function; using StorageHDFSPtr = std::shared_ptr; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - std::shared_ptr file_iterator_, - ColumnsDescription columns_description_); + std::shared_ptr file_iterator_); String getName() const override; @@ -158,7 +157,8 @@ public: private: StorageHDFSPtr storage; Block block_for_format; - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; UInt64 max_block_size; std::shared_ptr file_iterator; ColumnsDescription columns_description; diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 350051ab089..1dbf2f3c4e2 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,6 +37,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 133dbb6740f..6d61778a2f1 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -603,7 +604,7 @@ private: Pipe StorageAzureBlob::read( const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, @@ -615,15 +616,6 @@ Pipe StorageAzureBlob::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper; if (configuration.withGlobs()) { @@ -639,39 +631,15 @@ Pipe StorageAzureBlob::read( std::nullopt, query_info.query, virtual_block, local_context, nullptr); } - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, configuration.format, getVirtuals()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, configuration.compression_method, object_storage.get(), @@ -762,11 +730,6 @@ bool StorageAzureBlob::supportsPartitionBy() const return true; } -bool StorageAzureBlob::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageAzureBlob::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -1075,35 +1038,26 @@ Chunk StorageAzureBlobSource::generate() return {}; } -Block StorageAzureBlobSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageAzureBlobSource::StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_)) + :ISource(info.source_header) , WithContext(context_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , format(format_) , name(std::move(name_)) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , columns_desc(columns_) + , columns_desc(info.columns_description) , max_block_size(max_block_size_) , compression_hint(compression_hint_) , object_storage(std::move(object_storage_)) @@ -1159,6 +1113,13 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 25c791f1700..ce02324ed13 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -93,7 +94,7 @@ public: bool supportsPartitionBy() const override; - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; @@ -185,13 +186,11 @@ public: }; StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, @@ -204,10 +203,9 @@ public: String getName() const override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - private: - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; String format; String name; Block sample_block; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index ff67272e542..2009016d405 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -564,56 +566,46 @@ using StorageFilePtr = std::shared_ptr; class StorageFileSource : public ISource { public: - struct FilesInfo + class FilesIterator { + public: + explicit FilesIterator(const Strings & files_) : files(files_) + { + } + + String next() + { + auto current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= files.size()) + return ""; + + return files[current_index]; + } + + private: std::vector files; - - std::atomic next_file_to_read = 0; - - bool need_path_column = false; - bool need_file_column = false; - - size_t total_bytes_to_read = 0; + std::atomic index = 0; }; - using FilesInfoPtr = std::shared_ptr; - - static Block getBlockForSource(const Block & block_for_format, const FilesInfoPtr & files_info) - { - auto res = block_for_format; - if (files_info->need_path_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - } - if (files_info->need_file_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - } - return res; - } + using FilesIteratorPtr = std::shared_ptr; StorageFileSource( + const ReadFromFormatInfo & info, std::shared_ptr storage_, const StorageSnapshotPtr & storage_snapshot_, ContextPtr context_, UInt64 max_block_size_, - FilesInfoPtr files_info_, - ColumnsDescription columns_description_, - const Block & block_for_format_, + FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_)) + : ISource(info.source_header) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) - , files_info(std::move(files_info_)) + , files_iterator(std::move(files_iterator_)) , read_buf(std::move(read_buf_)) - , columns_description(std::move(columns_description_)) - , block_for_format(block_for_format_) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) , context(context_) , max_block_size(max_block_size_) { @@ -699,12 +691,10 @@ public: { if (!storage->use_table_fd) { - auto current_file = files_info->next_file_to_read.fetch_add(1); - if (current_file >= files_info->files.size()) + current_path = files_iterator->next(); + if (current_path.empty()) return {}; - current_path = files_info->files[current_file]; - /// Special case for distributed format. Defaults are not needed here. if (storage->format_name == "Distributed") { @@ -744,6 +734,13 @@ public: }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -755,19 +752,19 @@ public: UInt64 num_rows = chunk.getNumRows(); /// Enrich with virtual columns. - if (files_info->need_path_column) - { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - if (files_info->need_file_column) + for (const auto & virtual_column : requested_virtual_columns) { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - chunk.addColumn(column->convertToFullColumnIfConst()); + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, current_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_name)->convertToFullColumnIfConst()); + } } if (num_rows && total_files_size) @@ -799,7 +796,7 @@ public: private: std::shared_ptr storage; StorageSnapshotPtr storage_snapshot; - FilesInfoPtr files_info; + FilesIteratorPtr files_iterator; String current_path; Block sample_block; std::unique_ptr read_buf; @@ -808,6 +805,8 @@ private: std::unique_ptr reader; ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; Block block_for_format; ContextPtr context; /// TODO Untangle potential issues with context lifetime. @@ -849,18 +848,7 @@ Pipe StorageFile::read( } } - auto files_info = std::make_shared(); - files_info->files = paths; - files_info->total_bytes_to_read = total_bytes_to_read; - - for (const auto & column : column_names) - { - if (column == "_path") - files_info->need_path_column = true; - if (column == "_file") - files_info->need_file_column = true; - } - + auto files_iterator = std::make_shared(paths); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; @@ -876,33 +864,10 @@ Pipe StorageFile::read( if (progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + for (size_t i = 0; i < num_streams; ++i) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - } - - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - /// In case of reading from fd we have to check whether we have already created /// the read buffer from it in Storage constructor (for schema inference) or not. /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed @@ -912,13 +877,12 @@ Pipe StorageFile::read( read_buffer = std::move(peekable_read_buffer_from_fd); pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, storage_snapshot, context, max_block_size, - files_info, - columns_description, - block_for_format, + files_iterator, std::move(read_buffer))); } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed50ae73e51..a7c9beece17 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -75,6 +75,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 292ae4813dd..6a50d147c67 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -528,22 +529,12 @@ size_t StorageS3Source::KeysIterator::getTotalSize() const return pimpl->getTotalSize(); } -Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageS3Source::StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -552,20 +543,21 @@ StorageS3Source::StorageS3Source( const String & version_id_, std::shared_ptr file_iterator_, const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + : ISource(info.source_header) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) , version_id(version_id_) , format(format_) - , columns_desc(columns_) + , columns_desc(info.columns_description) + , requested_columns(info.requested_columns) , max_block_size(max_block_size_) , request_settings(request_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) @@ -611,6 +603,13 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); @@ -1026,11 +1025,6 @@ std::shared_ptr StorageS3::createFileIterator( } } -bool StorageS3::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageS3::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -1062,52 +1056,20 @@ Pipe StorageS3::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper = createFileIterator( query_configuration, distributed_processing, local_context, query_info.query, virtual_block); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, query_configuration.format, getVirtuals()); const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, query_configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, query_configuration.request_settings, query_configuration.compression_method, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..267007501e0 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -21,6 +21,7 @@ #include #include #include +#include namespace Aws::S3 { @@ -119,16 +120,12 @@ public: ReadTaskCallback callback; }; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format, String name_, - const Block & sample_block, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -150,6 +147,7 @@ private: String version_id; String format; ColumnsDescription columns_desc; + NamesAndTypesList requested_columns; UInt64 max_block_size; S3Settings::RequestSettings request_settings; String compression_hint; @@ -214,7 +212,7 @@ private: ReaderHolder reader; - std::vector requested_virtual_columns; + NamesAndTypesList requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; @@ -360,7 +358,7 @@ private: const std::optional & format_settings, ContextPtr ctx); - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 5c2229875e5..4b4558f6330 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + protected: void updateConfigurationIfChanged(ContextPtr local_context); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 947881d34c6..5dd5055dc42 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -23,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -208,25 +209,15 @@ void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credenti } } -Block StorageURLSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageURLSource::StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -234,7 +225,13 @@ StorageURLSource::StorageURLSource( const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + : ISource(info.source_header) + , name(std::move(name_)) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) + , uri_iterator(uri_iterator_) { auto headers = getHeaders(headers_); @@ -292,7 +289,7 @@ StorageURLSource::StorageURLSource( input_format = FormatFactory::instance().getInput( format, *read_buf, - sample_block, + info.format_header, context, max_block_size, format_settings, @@ -304,8 +301,20 @@ StorageURLSource::StorageURLSource( QueryPipelineBuilder builder; builder.init(Pipe(input_format)); - builder.addSimpleTransform([&](const Block & cur_header) - { return std::make_shared(cur_header, columns, *input_format, context); }); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(cur_header, columns_description, *input_format, context); + }); + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -712,27 +721,6 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - size_t max_download_threads = local_context->getSettingsRef().max_download_threads; std::shared_ptr iterator_wrapper{nullptr}; @@ -776,6 +764,8 @@ Pipe IStorageURLBase::read( num_streams = 1; } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + Pipes pipes; pipes.reserve(num_streams); @@ -783,16 +773,14 @@ Pipe IStorageURLBase::read( for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(column_names, read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, @@ -838,17 +826,17 @@ Pipe StorageURLWithFailover::read( return uri_options; }); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + auto pipe = Pipe(std::make_shared( - std::vector{}, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(column_names, read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 345f813dd7c..a6fb6840a22 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -46,6 +47,8 @@ public: bool supportsPartitionBy() const override { return true; } + bool supportsSubcolumns() const override { return true; } + NamesAndTypesList getVirtuals() const override; static ColumnsDescription getTableStructureFromData( @@ -158,16 +161,14 @@ public: using IteratorWrapper = std::function; StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -182,8 +183,6 @@ public: static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - static std::pair> getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, @@ -202,7 +201,10 @@ private: InitializeFunc initialize; String name; - std::vector requested_virtual_columns; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; + Block block_for_format; std::shared_ptr uri_iterator; Poco::URI curr_uri; diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 67771416771..bd475d78f65 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp new file mode 100644 index 00000000000..aac5e53c569 --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -0,0 +1,69 @@ +#include +#include + +namespace DB +{ + +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const String & format_name, const NamesAndTypesList & virtuals) +{ + ReadFromFormatInfo info; + /// Collect requested virtual columns and remove them from requested columns. + Strings columns_to_read; + for (const auto & column_name : requested_columns) + { + bool is_virtual = false; + for (const auto & virtual_column : virtuals) + { + if (column_name == virtual_column.name) + { + info.requested_virtual_columns.push_back(virtual_column); + is_virtual = true; + break; + } + } + + if (!is_virtual) + columns_to_read.push_back(column_name); + } + + /// Create header for Source that will contain all requested columns including virtual columns at the end + /// (because they will be added to the chunk after reading regular columns). + info.source_header = storage_snapshot->getSampleBlockForColumns(columns_to_read); + for (const auto & requested_virtual_column : info.requested_virtual_columns) + info.source_header.insert({requested_virtual_column.type->createColumn(), requested_virtual_column.type, requested_virtual_column.name}); + + /// Set requested columns that should be read from data. + info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), columns_to_read); + + if (format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name)) + { + /// If only virtual columns were requested, just read the smallest column. + if (columns_to_read.empty()) + { + columns_to_read.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + } + /// We need to replace all subcolumns with their nested columns (e.g `a.b`, `a.b.c`, `x.y` -> `a`, `x`), + /// because most formats cannot extract subcolumns on their own. + /// All requested subcolumns will be extracted after reading. + else + { + std::unordered_set columns_to_read_set; + for (const auto & column_to_read : info.requested_columns) + columns_to_read_set.insert(column_to_read.getNameInStorage()); + columns_to_read = Strings(columns_to_read_set.begin(), columns_to_read_set.end()); + } + info.columns_description = storage_snapshot->getDescriptionForColumns(columns_to_read); + } + /// If format doesn't support reading subset of columns, read all columns. + /// Requested columns/subcolumns will be extracted after reading. + else + { + info.columns_description = storage_snapshot->metadata->getColumns(); + } + + /// Create header for InputFormat with columns that will be read from the data. + info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical()); + return info; +} + +} diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h new file mode 100644 index 00000000000..ac029c215db --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace DB +{ + struct ReadFromFormatInfo + { + /// Header that will return Source from storage. + /// It contains all requested columns including virtual columns; + Block source_header; + /// Header that will be passed to IInputFormat to read data from file. + /// It can contain more columns than were requested if format doesn't support + /// reading subset of columns. + Block format_header; + /// Description of columns for format_header. Used for inserting defaults. + ColumnsDescription columns_description; + /// The list of requested columns without virtual columns. + NamesAndTypesList requested_columns; + /// The list of requested virtual columns. + NamesAndTypesList requested_virtual_columns; + }; + + /// Get all needed information for reading from data in some input format. + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const String & format_name, const NamesAndTypesList & virtuals); +} diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..20d58573686 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -592,3 +592,40 @@ def test_partition_by_tf(cluster): assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv") assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv") assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv") + + +def test_read_subcolumns(cluster): + node = cluster.instances["node"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\tcont/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 5ac1d3bea6f..82ce901a9e6 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -866,6 +866,54 @@ def test_skip_empty_files(started_cluster): assert int(res) == 0 +def test_read_subcolumns(started_cluster): + node = started_cluster.instances["node1"] + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res + == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index a7293337a9e..a6c79b743bb 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1780,3 +1780,66 @@ def test_skip_empty_files(started_cluster): ) assert len(res.strip()) == 0 + + +def test_read_subcolumns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\t/root/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference new file mode 100644 index 00000000000..45ea5a7a29f --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference @@ -0,0 +1,4 @@ +2 (1,2) 3 +2 (1,2) 3 +0 (0,0) 0 +42 (42,42) 42 diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh new file mode 100755 index 00000000000..767acf68553 --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format TSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', TSV, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format JSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', JSONEachRow, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + +rm $DATA_FILE + From 3dc4ff17608b42fb5aabe8a9e7cf90ec0f71e60e Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 4 Jul 2023 21:21:22 +0000 Subject: [PATCH 009/113] Remove obsolete settings --- docs/en/interfaces/formats.md | 3 -- .../operations/settings/settings-formats.md | 33 ------------------- docs/ru/interfaces/formats.md | 6 ---- docs/ru/operations/settings/settings.md | 33 ------------------- 4 files changed, 75 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 79baf04d75d..22a5fd0db37 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2056,7 +2056,6 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. - [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`. - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. @@ -2261,7 +2260,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2327,7 +2325,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. - [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 1b22a6d1223..3c7c66fabdb 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1061,17 +1061,6 @@ Default value: 1. ## Arrow format settings {#arrow-format-settings} -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} Ignore case when matching Arrow column names with ClickHouse column names. @@ -1121,17 +1110,6 @@ Default value: `none`. ## ORC format settings {#orc-format-settings} -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} Batch size when reading ORC stripes. @@ -1170,17 +1148,6 @@ Default value: `none`. ## Parquet format settings {#parquet-format-settings} -### input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} Ignore case when matching Parquet column names with ClickHouse column names. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 48a6132170a..c71b2cfb75e 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). - Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). - ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). - ### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f83d05ff710..ae56e973ced 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_orc_import_nested {#input_format_orc_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). From e204a8fbe90d0ae21ac2085465c88a036f0b0b43 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 4 Jul 2023 21:21:43 +0000 Subject: [PATCH 010/113] Fix indent --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 710a6e4f135..1138b6a94b3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -829,7 +829,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \ MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \ -/** The section above is for obsolete settings. Do not add anything there. */ + /** The section above is for obsolete settings. Do not add anything there. */ #define FORMAT_FACTORY_SETTINGS(M, ALIAS) \ From 00292be8f23f97a1f534a7f41e37f8d25b6feccd Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 4 Jul 2023 21:24:56 +0000 Subject: [PATCH 011/113] Remore unrelated chages --- src/Core/SettingsQuirks.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index cee9c3d497c..3326f42adf5 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -71,12 +71,6 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log) } } -//#if defined(THREAD_SANITIZER) - settings.use_hedged_requests.value = false; - if (log) - LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan"); -//#endif - if (!queryProfilerWorks()) { if (settings.query_profiler_real_time_period_ns) From ba5e26aebf242211db3dbc2f7f987b92dd2c76bd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 10:49:32 +0000 Subject: [PATCH 012/113] need to deal with nulls --- src/Functions/array/arrayIntersect.cpp | 101 +++++++++---------------- 1 file changed, 36 insertions(+), 65 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 9d0021782ac..22f64428fd1 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -19,10 +21,12 @@ #include #include #include "Common/Exception.h" +#include "Common/logger_useful.h" #include #include #include #include +#include namespace DB @@ -511,11 +515,12 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable map.clear(); bool all_has_nullable = all_nullable; + bool current_has_nullable = false; for (size_t arg_num = 0; arg_num < args; ++arg_num) { const auto & arg = arrays.args[arg_num]; - bool current_has_nullable = false; + current_has_nullable = false; size_t off; // const array has only one row @@ -550,46 +555,19 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable } } - prev_off[arg_num] = off; - if (arg.is_const) - prev_off[arg_num] = 0; - - if (!current_has_nullable) + if (arg_num) + { + prev_off[arg_num] = off; + if (arg.is_const) + prev_off[arg_num] = 0; + } + if(!current_has_nullable) all_has_nullable = false; } - if (all_has_nullable) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - } - - for (const auto & pair : map) - { - if (pair.getMapped() == args) - { - ++result_offset; - // if constexpr (is_numeric_column) - // result_data.insertValue(pair.getKey()); - // else if constexpr (std::is_same_v || std::is_same_v) - // result_data.insertData(pair.getKey().data, pair.getKey().size); - // else - // result_data.deserializeAndInsertFromArena(pair.getKey().data); - - // if (all_nullable) - // null_map.push_back(0); - } - } - // result_offsets.getElement(row) = result_offset; - } - - for (size_t row = 0; row < rows; ++row) - { - bool all_has_nullable = all_nullable; for (size_t arg_num = 0; arg_num < 1; ++arg_num) { - bool current_has_nullable = false; + all_has_nullable = all_nullable; const auto & arg = arrays.args[arg_num]; size_t off; // const array has only one row @@ -598,29 +576,23 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else off = (*arg.offsets)[row]; - prev_off[arg_num] = off; - if (arg.is_const) - prev_off[arg_num] = 0; - // throw Exception(ErrorCodes::LOGICAL_ERROR, "{}", result_offset); - for (size_t res_num = 0; res_num < result_offset; ++res_num) + for (auto i : collections::range(prev_off[arg_num], off)) { - if (arg.null_map && (*arg.null_map)[row]) + typename Map::LookupResult pair = nullptr; + if (arg.null_map && (*arg.null_map)[i]) current_has_nullable = true; - - typename Map::LookupResult pair; - - if constexpr (is_numeric_column) + else if constexpr (is_numeric_column) { - pair = map.find(columns[arg_num]->getElement(res_num)); + pair = map.find(columns[arg_num]->getElement(i)); } else if constexpr (std::is_same_v || std::is_same_v) - pair = map.find(columns[arg_num]->getDataAt(res_num)); + pair = map.find(columns[arg_num]->getDataAt(i)); else { const char * data = nullptr; - pair = map.find(columns[arg_num]->serializeValueIntoArena(res_num, arena, data)); + pair = map.find(columns[arg_num]->serializeValueIntoArena(i, arena, data)); } - + prev_off[arg_num] = off; if (arg.is_const) prev_off[arg_num] = 0; @@ -628,47 +600,46 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable if (!current_has_nullable) all_has_nullable = false; - if (pair->getMapped() == args)//for (const auto & pair : map) + if (pair && pair->getMapped() == args) { + ++pair->getMapped(); + ++result_offset; if constexpr (is_numeric_column) { - if (pair->getKey() == columns[arg_num]->getElement(res_num)) + if (pair->getKey() == columns[arg_num]->getElement(i)) { - // ++result_offset; result_data.insertValue(pair->getKey()); } } else if constexpr (std::is_same_v || std::is_same_v) { - if (pair->getKey() == columns[arg_num]->getDataAt(res_num)) + if (pair->getKey() == columns[arg_num]->getDataAt(i)) { - // ++result_offset; result_data.insertData(pair->getKey().data, pair->getKey().size); } } else { const char * data = nullptr; - if (pair->getKey() == columns[arg_num]->serializeValueIntoArena(res_num, arena, data)) + if (pair->getKey() == columns[arg_num]->serializeValueIntoArena(i, arena, data)) { - // ++result_offset; result_data.deserializeAndInsertFromArena(pair->getKey().data); } } if (all_nullable) null_map.push_back(0); + // std::cerr << "========== " << current_has_nullable << std::endl; + } + if (all_has_nullable) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); } - } - if (all_has_nullable) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); } result_offsets.getElement(row) = result_offset; - - } + } } ColumnPtr result_column = std::move(result_data_ptr); if (all_nullable) From 966f07bd8eb4b2dfdb89e984d35771be56205cc6 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 15:42:49 +0000 Subject: [PATCH 013/113] fixed tests and dealt with nulls --- src/Functions/array/arrayIntersect.cpp | 125 +++++++++--------- .../00556_array_intersect.reference | 2 +- 2 files changed, 65 insertions(+), 62 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 22f64428fd1..46f7af56751 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -565,88 +565,91 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable all_has_nullable = false; } - for (size_t arg_num = 0; arg_num < 1; ++arg_num) + // We have NULL in output only once if it should be there + bool null_added = false; + const auto & arg = arrays.args[0]; + size_t off; + // const array has only one row + if (arg.is_const) + off = (*arg.offsets)[0]; + else + off = (*arg.offsets)[row]; + + for (auto i : collections::range(prev_off[0], off)) { all_has_nullable = all_nullable; - const auto & arg = arrays.args[arg_num]; - size_t off; - // const array has only one row - if (arg.is_const) - off = (*arg.offsets)[0]; - else - off = (*arg.offsets)[row]; + current_has_nullable = false; + typename Map::LookupResult pair = nullptr; - for (auto i : collections::range(prev_off[arg_num], off)) + if (arg.null_map && (*arg.null_map)[i]) { - typename Map::LookupResult pair = nullptr; - if (arg.null_map && (*arg.null_map)[i]) - current_has_nullable = true; - else if constexpr (is_numeric_column) + current_has_nullable = true; + if (null_added) + continue; + } + else if constexpr (is_numeric_column) + { + pair = map.find(columns[0]->getElement(i)); + } + else if constexpr (std::is_same_v || std::is_same_v) + pair = map.find(columns[0]->getDataAt(i)); + else + { + const char * data = nullptr; + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + } + prev_off[0] = off; + if (arg.is_const) + prev_off[0] = 0; + + if (!current_has_nullable) + all_has_nullable = false; + + if (pair && pair->getMapped() == args) + { + ++pair->getMapped(); + ++result_offset; + if constexpr (is_numeric_column) { - pair = map.find(columns[arg_num]->getElement(i)); + if (pair->getKey() == columns[0]->getElement(i)) + { + result_data.insertValue(pair->getKey()); + } } else if constexpr (std::is_same_v || std::is_same_v) - pair = map.find(columns[arg_num]->getDataAt(i)); + { + if (pair->getKey() == columns[0]->getDataAt(i)) + { + result_data.insertData(pair->getKey().data, pair->getKey().size); + } + } else { const char * data = nullptr; - pair = map.find(columns[arg_num]->serializeValueIntoArena(i, arena, data)); - } - - prev_off[arg_num] = off; - if (arg.is_const) - prev_off[arg_num] = 0; - - if (!current_has_nullable) - all_has_nullable = false; - - if (pair && pair->getMapped() == args) - { - ++pair->getMapped(); - ++result_offset; - if constexpr (is_numeric_column) + if (pair->getKey() == columns[0]->serializeValueIntoArena(i, arena, data)) { - if (pair->getKey() == columns[arg_num]->getElement(i)) - { - result_data.insertValue(pair->getKey()); - } + result_data.deserializeAndInsertFromArena(pair->getKey().data); } - else if constexpr (std::is_same_v || std::is_same_v) - { - if (pair->getKey() == columns[arg_num]->getDataAt(i)) - { - result_data.insertData(pair->getKey().data, pair->getKey().size); - } - } - else - { - const char * data = nullptr; - if (pair->getKey() == columns[arg_num]->serializeValueIntoArena(i, arena, data)) - { - result_data.deserializeAndInsertFromArena(pair->getKey().data); - } - } - if (all_nullable) - null_map.push_back(0); - // std::cerr << "========== " << current_has_nullable << std::endl; - } - if (all_has_nullable) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); } + if (all_nullable) + null_map.push_back(0); + } + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; } - result_offsets.getElement(row) = result_offset; - } + result_offsets.getElement(row) = result_offset; + } ColumnPtr result_column = std::move(result_data_ptr); if (all_nullable) result_column = ColumnNullable::create(result_column, std::move(null_map_column)); return ColumnArray::create(result_column, std::move(result_offsets_ptr)); - } diff --git a/tests/queries/0_stateless/00556_array_intersect.reference b/tests/queries/0_stateless/00556_array_intersect.reference index fad9b2f507d..b9841ef42c4 100644 --- a/tests/queries/0_stateless/00556_array_intersect.reference +++ b/tests/queries/0_stateless/00556_array_intersect.reference @@ -5,7 +5,7 @@ [1] [1] [1] -[NULL,1] +[1,NULL] [1] [1] [[1,1]] From 7c84599764d810b5154b8cb9a997058edbbbb8ca Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 15:46:46 +0000 Subject: [PATCH 014/113] removed unused includes --- docs/en/operations/system-tables/parts.md | 2 -- src/Functions/array/arrayIntersect.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index e04c67b1585..9159d1e9284 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -39,8 +39,6 @@ Columns: - `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk. - - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. - `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 46f7af56751..b2df0263470 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include #include @@ -20,13 +18,10 @@ #include #include #include -#include "Common/Exception.h" -#include "Common/logger_useful.h" #include #include #include #include -#include namespace DB From 118ea83e219c2a7651852541ca9724705bd1947b Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 15:51:16 +0000 Subject: [PATCH 015/113] undo unimportant changes --- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 - src/Storages/System/StorageSystemParts.cpp | 3 --- .../0_stateless/02117_show_create_table_system.reference | 1 - 3 files changed, 5 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 4be803fa05e..fd73d802579 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -341,7 +341,6 @@ public: UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; UInt64 getMarksCount() const; - UInt64 getIndexSizeFromFile() const; UInt64 getBytesOnDisk() const { return bytes_on_disk; } void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index e1e8ba1aa00..86ecb336b51 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -57,7 +57,6 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"bytes_on_disk", std::make_shared()}, {"data_compressed_bytes", std::make_shared()}, {"data_uncompressed_bytes", std::make_shared()}, - {"primary_key_size", std::make_shared()}, {"marks_bytes", std::make_shared()}, {"secondary_indices_compressed_bytes", std::make_shared()}, {"secondary_indices_uncompressed_bytes", std::make_shared()}, @@ -169,8 +168,6 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(columns_size.data_compressed); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.data_uncompressed); - if (columns_mask[src_index++]) - columns[res_index++]->insert(part->getIndexSizeFromFile()); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.marks); if (columns_mask[src_index++]) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d71e8d4acab..703972279e7 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -466,7 +466,6 @@ CREATE TABLE system.parts `bytes_on_disk` UInt64, `data_compressed_bytes` UInt64, `data_uncompressed_bytes` UInt64, - `primary_key_size` UInt64, `marks_bytes` UInt64, `secondary_indices_compressed_bytes` UInt64, `secondary_indices_uncompressed_bytes` UInt64, From 4e0fed3d60f94a7be1341077b05e8e149c375340 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 15:54:12 +0000 Subject: [PATCH 016/113] undo unimportant changes --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 148e9b2e4db..cf2d1b19326 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,6 +1,5 @@ #include "IMergeTreeDataPart.h" #include "Storages/MergeTree/IDataPartStorage.h" -#include "base/types.h" #include #include @@ -1800,22 +1799,6 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } -UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const -{ - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - if (parent_part) - metadata_snapshot = metadata_snapshot->projections.get(name).metadata; - const auto & pk = metadata_snapshot->getPrimaryKey(); - if (!pk.column_names.empty()) - { - String file = "primary" + getIndexExtension(false); - if (checksums.files.contains("primary" + getIndexExtension(true))) - file = "primary" + getIndexExtension(true); - return getFileSizeOrZero(file); - } - return 0; -} - void IMergeTreeDataPart::checkConsistencyBase() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); From 33f32684d42034db67c77bcf867165c31c4d4354 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 5 Jul 2023 16:34:42 +0000 Subject: [PATCH 017/113] fixed style check --- src/Functions/array/arrayIntersect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index b2df0263470..7a8bde7ab5f 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -556,7 +556,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable if (arg.is_const) prev_off[arg_num] = 0; } - if(!current_has_nullable) + if (!current_has_nullable) all_has_nullable = false; } From d11cd0dc30d998324961cef3f649e203a1d8c957 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 5 Jul 2023 17:56:03 +0000 Subject: [PATCH 018/113] Fix tests --- src/Processors/Formats/IRowInputFormat.cpp | 1 - src/Storages/HDFS/StorageHDFS.cpp | 7 +++-- src/Storages/StorageAzureBlob.cpp | 7 +++-- src/Storages/StorageFile.cpp | 7 +++-- src/Storages/StorageS3.cpp | 5 ++-- src/Storages/StorageURL.cpp | 32 +++++++++------------- src/Storages/StorageURL.h | 4 +-- src/Storages/prepareReadingFromFormat.cpp | 6 ++-- src/Storages/prepareReadingFromFormat.h | 2 +- 9 files changed, 34 insertions(+), 37 deletions(-) diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 0728aecf61f..8bcf293033b 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -97,7 +97,6 @@ Chunk IRowInputFormat::generate() size_t num_rows = 0; size_t chunk_start_offset = getDataOffsetMaybeCompressed(getReadBuffer()); - try { RowReadExtension info; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index c662c21e60f..4949874cd74 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -440,7 +440,8 @@ Chunk HDFSSource::generate() size_t chunk_size = input_format->getApproxBytesReadForChunk(); if (!chunk_size) chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + if (chunk_size) + updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } for (const auto & virtual_column : requested_virtual_columns) @@ -602,7 +603,7 @@ private: bool StorageHDFS::supportsSubsetOfColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe StorageHDFS::read( @@ -640,7 +641,7 @@ Pipe StorageHDFS::read( }); } - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index e74e141bb16..1512cccf891 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -632,7 +632,7 @@ Pipe StorageAzureBlob::read( query_info.query, virtual_block, local_context, nullptr); } - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, configuration.format, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( @@ -994,8 +994,9 @@ Chunk StorageAzureBlobSource::generate() size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); if (!chunk_size) chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + if (chunk_size) + updateRowsProgressApprox( + *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } for (const auto & virtual_column : requested_virtual_columns) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8e6745c14f7..eb5d8fd46b5 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -773,8 +773,9 @@ public: size_t chunk_size = input_format->getApproxBytesReadForChunk(); if (!chunk_size) chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + if (chunk_size) + updateRowsProgressApprox( + *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } return chunk; } @@ -865,7 +866,7 @@ Pipe StorageFile::read( if (progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); for (size_t i = 0; i < num_streams; ++i) { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6a50d147c67..3eca7a3ef08 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -720,7 +720,8 @@ Chunk StorageS3Source::generate() size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); if (!chunk_size) chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + if (chunk_size) + updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } for (const auto & virtual_column : requested_virtual_columns) @@ -1059,7 +1060,7 @@ Pipe StorageS3::read( std::shared_ptr iterator_wrapper = createFileIterator( query_configuration, distributed_processing, local_context, query_info.query, virtual_block); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, query_configuration.format, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 61efff72a49..f0f15e29129 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -290,7 +290,7 @@ StorageURLSource::StorageURLSource( input_format = FormatFactory::instance().getInput( format, *read_buf, - info.format_header, + block_for_format, context, max_block_size, format_settings, @@ -346,8 +346,9 @@ Chunk StorageURLSource::generate() size_t chunk_size = input_format->getApproxBytesReadForChunk(); if (!chunk_size) chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + if (chunk_size) + updateRowsProgressApprox( + *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } const String & path{curr_uri.getPath()}; @@ -765,7 +766,7 @@ Pipe IStorageURLBase::read( num_streams = 1; } - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); Pipes pipes; pipes.reserve(num_streams); @@ -777,7 +778,13 @@ Pipe IStorageURLBase::read( read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback( + read_from_format_info.columns_description.getNamesOfPhysical(), + read_from_format_info.columns_description, + query_info, + local_context, + processed_stage, + max_block_size), format_name, format_settings, getName(), @@ -804,19 +811,6 @@ Pipe StorageURLWithFailover::read( size_t max_block_size, size_t /*num_streams*/) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); auto iterator_wrapper = std::make_shared([&, done = false]() mutable @@ -827,7 +821,7 @@ Pipe StorageURLWithFailover::read( return uri_options; }); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, format_name, getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); auto pipe = Pipe(std::make_shared( read_from_format_info, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index a6fb6840a22..04da5d177ec 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -47,8 +47,6 @@ public: bool supportsPartitionBy() const override { return true; } - bool supportsSubcolumns() const override { return true; } - NamesAndTypesList getVirtuals() const override; static ColumnsDescription getTableStructureFromData( @@ -110,6 +108,8 @@ protected: bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index aac5e53c569..7cc31066e31 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -4,7 +4,7 @@ namespace DB { -ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const String & format_name, const NamesAndTypesList & virtuals) +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) { ReadFromFormatInfo info; /// Collect requested virtual columns and remove them from requested columns. @@ -33,9 +33,9 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c info.source_header.insert({requested_virtual_column.type->createColumn(), requested_virtual_column.type, requested_virtual_column.name}); /// Set requested columns that should be read from data. - info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), columns_to_read); + info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(), columns_to_read); - if (format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name)) + if (supports_subset_of_columns) { /// If only virtual columns were requested, just read the smallest column. if (columns_to_read.empty()) diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h index ac029c215db..c5f3959a550 100644 --- a/src/Storages/prepareReadingFromFormat.h +++ b/src/Storages/prepareReadingFromFormat.h @@ -22,5 +22,5 @@ namespace DB }; /// Get all needed information for reading from data in some input format. - ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const String & format_name, const NamesAndTypesList & virtuals); + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); } From 8d634c992bda74095befdf6b47012cbe17acceae Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 6 Jul 2023 17:47:01 +0000 Subject: [PATCH 019/113] Fix tests --- src/Storages/StorageURL.cpp | 2 +- src/Storages/StorageURL.h | 4 ++-- src/Storages/prepareReadingFromFormat.cpp | 13 +++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f0f15e29129..bb103c0d947 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -827,7 +827,7 @@ Pipe StorageURLWithFailover::read( read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 04da5d177ec..c77eb37601c 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -108,8 +108,6 @@ protected: bool supportsSubsetOfColumns() const override; - bool supportsSubcolumns() const override { return true; } - bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; @@ -278,6 +276,8 @@ public: return storage_snapshot->metadata->getSampleBlock(); } + bool supportsSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index 7cc31066e31..6be4213ec6b 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -48,9 +48,18 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c else { std::unordered_set columns_to_read_set; + /// Save original order of columns. + std::vector new_columns_to_read; for (const auto & column_to_read : info.requested_columns) - columns_to_read_set.insert(column_to_read.getNameInStorage()); - columns_to_read = Strings(columns_to_read_set.begin(), columns_to_read_set.end()); + { + auto name = column_to_read.getNameInStorage(); + if (!columns_to_read_set.contains(name)) + { + columns_to_read_set.insert(name); + new_columns_to_read.push_back(name); + } + } + columns_to_read = std::move(new_columns_to_read); } info.columns_description = storage_snapshot->getDescriptionForColumns(columns_to_read); } From 72e8303ee7dd73ff0038cb5cbf28b63c98d1a183 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jul 2023 22:49:06 +0200 Subject: [PATCH 020/113] Remove try/catch from DatabaseFilesystem --- .../Serializations/SerializationWrapper.h | 1 - src/Databases/DatabaseFilesystem.cpp | 23 ++++--------------- src/Databases/DatabaseFilesystem.h | 2 +- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index bf922888af9..31900f93148 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -77,7 +77,6 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; - void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 7eaf474eea0..60ae55fff11 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -128,17 +128,17 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) if (tryGetTableFromCache(name)) return true; - return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false); + return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */ false); } -StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const +StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_, bool throw_on_error) const { /// Check if table exists in loaded tables map. if (auto table = tryGetTableFromCache(name)) return table; auto table_path = getTablePath(name); - checkTableFilePath(table_path, context_, /* throw_on_error */true); + checkTableFilePath(table_path, context_, throw_on_error); /// If the file exists, create a new table using TableFunctionFile and return it. auto args = makeASTFunction("file", std::make_shared(table_path)); @@ -158,7 +158,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const { /// getTableImpl can throw exceptions, do not catch them to show correct error to user. - if (auto storage = getTableImpl(name, context_)) + if (auto storage = getTableImpl(name, context_, true)) return storage; throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", @@ -167,20 +167,7 @@ StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { - try - { - return getTableImpl(name, context_); - } - catch (const Exception & e) - { - /// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table - /// see tests/02722_database_filesystem.sh for more details. - if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) - { - return nullptr; - } - throw; - } + return getTableImpl(name, context_, false); } bool DatabaseFilesystem::empty() const diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 7fe620401dc..b72891b9a5c 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -48,7 +48,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; protected: - StoragePtr getTableImpl(const String & name, ContextPtr context) const; + StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const; StoragePtr tryGetTableFromCache(const std::string & name) const; From 1050fab8decfc01533527280b6f78f757c053bc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jul 2023 23:12:08 +0200 Subject: [PATCH 021/113] Remove another exception --- src/Databases/DatabaseFilesystem.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 60ae55fff11..a6365b0348c 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -11,9 +11,11 @@ #include #include #include +#include #include + namespace fs = std::filesystem; namespace DB @@ -138,10 +140,15 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont return table; auto table_path = getTablePath(name); - checkTableFilePath(table_path, context_, throw_on_error); + if (!checkTableFilePath(table_path, context_, throw_on_error)) + return {}; + + String format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error); + if (format.empty()) + return {}; /// If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path)); + auto args = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); auto table_function = TableFunctionFactory::instance().get(args, context_); if (!table_function) From db4735a10508e02b50565bba8a1e71161df90f82 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 17 Jul 2023 12:12:25 +0000 Subject: [PATCH 022/113] Implemented requested changes --- src/Functions/array/arrayIntersect.cpp | 47 +++++++++++--------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 7a8bde7ab5f..83b26b56071 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -510,7 +510,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable map.clear(); bool all_has_nullable = all_nullable; - bool current_has_nullable = false; + bool current_has_nullable; for (size_t arg_num = 0; arg_num < args; ++arg_num) { @@ -549,7 +549,8 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable ++(*value); } } - + // We update offsets for all the arrays except the first one. Offsets for the first array would be updated later. + // It is needed to iterate the first array again so that the elements in the result would have fixed order. if (arg_num) { prev_off[arg_num] = off; @@ -570,15 +571,21 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else off = (*arg.offsets)[row]; + bool is_map_serialized = false; for (auto i : collections::range(prev_off[0], off)) { - all_has_nullable = all_nullable; - current_has_nullable = false; typename Map::LookupResult pair = nullptr; if (arg.null_map && (*arg.null_map)[i]) { current_has_nullable = true; + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } if (null_added) continue; } @@ -591,51 +598,37 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else { const char * data = nullptr; - pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + if (!is_map_serialized) + { + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + is_map_serialized = true; + } } prev_off[0] = off; if (arg.is_const) prev_off[0] = 0; - if (!current_has_nullable) - all_has_nullable = false; - if (pair && pair->getMapped() == args) { + // We increase pair->getMapped() here to not skip duplicate values from the first array. ++pair->getMapped(); ++result_offset; if constexpr (is_numeric_column) { - if (pair->getKey() == columns[0]->getElement(i)) - { - result_data.insertValue(pair->getKey()); - } + result_data.insertValue(pair->getKey()); } else if constexpr (std::is_same_v || std::is_same_v) { - if (pair->getKey() == columns[0]->getDataAt(i)) - { - result_data.insertData(pair->getKey().data, pair->getKey().size); - } + result_data.insertData(pair->getKey().data, pair->getKey().size); } else { const char * data = nullptr; - if (pair->getKey() == columns[0]->serializeValueIntoArena(i, arena, data)) - { - result_data.deserializeAndInsertFromArena(pair->getKey().data); - } + result_data.deserializeAndInsertFromArena(pair->getKey().data); } if (all_nullable) null_map.push_back(0); } - if (all_has_nullable && !null_added) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - null_added = true; - } } result_offsets.getElement(row) = result_offset; From adb46fa17498e02b1f92c71735f5ab175d4ff4bf Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 26 Jul 2023 15:20:54 +0000 Subject: [PATCH 023/113] Revert "Implemented requested changes" This reverts commit db4735a10508e02b50565bba8a1e71161df90f82. --- src/Functions/array/arrayIntersect.cpp | 47 +++++++++++++++----------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 83b26b56071..7a8bde7ab5f 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -510,7 +510,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable map.clear(); bool all_has_nullable = all_nullable; - bool current_has_nullable; + bool current_has_nullable = false; for (size_t arg_num = 0; arg_num < args; ++arg_num) { @@ -549,8 +549,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable ++(*value); } } - // We update offsets for all the arrays except the first one. Offsets for the first array would be updated later. - // It is needed to iterate the first array again so that the elements in the result would have fixed order. + if (arg_num) { prev_off[arg_num] = off; @@ -571,21 +570,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else off = (*arg.offsets)[row]; - bool is_map_serialized = false; for (auto i : collections::range(prev_off[0], off)) { + all_has_nullable = all_nullable; + current_has_nullable = false; typename Map::LookupResult pair = nullptr; if (arg.null_map && (*arg.null_map)[i]) { current_has_nullable = true; - if (all_has_nullable && !null_added) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - null_added = true; - } if (null_added) continue; } @@ -598,37 +591,51 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else { const char * data = nullptr; - if (!is_map_serialized) - { - pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); - is_map_serialized = true; - } + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); } prev_off[0] = off; if (arg.is_const) prev_off[0] = 0; + if (!current_has_nullable) + all_has_nullable = false; + if (pair && pair->getMapped() == args) { - // We increase pair->getMapped() here to not skip duplicate values from the first array. ++pair->getMapped(); ++result_offset; if constexpr (is_numeric_column) { - result_data.insertValue(pair->getKey()); + if (pair->getKey() == columns[0]->getElement(i)) + { + result_data.insertValue(pair->getKey()); + } } else if constexpr (std::is_same_v || std::is_same_v) { - result_data.insertData(pair->getKey().data, pair->getKey().size); + if (pair->getKey() == columns[0]->getDataAt(i)) + { + result_data.insertData(pair->getKey().data, pair->getKey().size); + } } else { const char * data = nullptr; - result_data.deserializeAndInsertFromArena(pair->getKey().data); + if (pair->getKey() == columns[0]->serializeValueIntoArena(i, arena, data)) + { + result_data.deserializeAndInsertFromArena(pair->getKey().data); + } } if (all_nullable) null_map.push_back(0); } + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } } result_offsets.getElement(row) = result_offset; From dc5cc0a5418a9aab424939055e8b67200e1f2996 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 26 Jul 2023 16:23:17 +0000 Subject: [PATCH 024/113] fixed check --- src/Functions/array/arrayIntersect.cpp | 27 +++++++++++++------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 7a8bde7ab5f..6abfb8ec5c6 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -550,6 +550,8 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable } } + // We update offsets for all the arrays except the first one. Offsets for the first array would be updated later. + // It is needed to iterate the first array again so that the elements in the result would have fixed order. if (arg_num) { prev_off[arg_num] = off; @@ -573,12 +575,18 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable for (auto i : collections::range(prev_off[0], off)) { all_has_nullable = all_nullable; - current_has_nullable = false; typename Map::LookupResult pair = nullptr; if (arg.null_map && (*arg.null_map)[i]) { current_has_nullable = true; + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } if (null_added) continue; } @@ -602,29 +610,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable if (pair && pair->getMapped() == args) { + // We increase pair->getMapped() here to not skip duplicate values from the first array. ++pair->getMapped(); ++result_offset; if constexpr (is_numeric_column) { - if (pair->getKey() == columns[0]->getElement(i)) - { - result_data.insertValue(pair->getKey()); - } + result_data.insertValue(pair->getKey()); } else if constexpr (std::is_same_v || std::is_same_v) { - if (pair->getKey() == columns[0]->getDataAt(i)) - { - result_data.insertData(pair->getKey().data, pair->getKey().size); - } + result_data.insertData(pair->getKey().data, pair->getKey().size); } else { - const char * data = nullptr; - if (pair->getKey() == columns[0]->serializeValueIntoArena(i, arena, data)) - { - result_data.deserializeAndInsertFromArena(pair->getKey().data); - } + result_data.deserializeAndInsertFromArena(pair->getKey().data); } if (all_nullable) null_map.push_back(0); From eae86f603c24600df39f38f255a700c4acc7432f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 06:34:09 +0200 Subject: [PATCH 025/113] Fix the test --- src/Databases/DatabaseFilesystem.cpp | 6 ++---- src/Interpreters/DatabaseCatalog.cpp | 21 ++++++++++++++++--- .../0_stateless/02722_database_filesystem.sh | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index a6365b0348c..59f9ee67d7b 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -77,10 +77,8 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont /// Check access for file before checking its existence. if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path)) { - if (throw_on_error) - throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); - else - return false; + /// Access denied is thrown regardless of 'throw_on_error' + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); } /// Check if the corresponding file exists. diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 13cac5afb1b..f8481e3f1d8 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -336,7 +336,6 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return db_and_table; } - if (table_id.database_name == TEMPORARY_DATABASE) { /// For temporary tables UUIDs are set in Context::resolveStorageID(...). @@ -369,8 +368,24 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( database = it->second; } - auto table = database->tryGetTable(table_id.table_name, context_); - if (!table && exception) + StoragePtr table; + if (exception) + { + try + { + table = database->getTable(table_id.table_name, context_); + } + catch (const Exception & e) + { + exception->emplace(e); + } + } + else + { + table = database->tryGetTable(table_id.table_name, context_); + } + + if (!table && exception && !exception->has_value()) exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); if (!table) diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 3b7a41bb39e..7b2e1bf1a66 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -61,7 +61,7 @@ CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); """ 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: # FILE_DOESNT_EXIST: unknown file -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 60" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 107" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: Cannot determine the file format by it's extension ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: From 7c1832fcfc7b0b97d6e9d672938c0f26f04e4738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 09:41:47 +0300 Subject: [PATCH 026/113] Fix named collections related statements #51609 --- .../NamedCollections/NamedCollectionUtils.cpp | 56 +++++++++++-------- .../NamedCollections/NamedCollectionUtils.h | 4 +- .../InterpreterCreateNamedCollectionQuery.cpp | 1 - .../InterpreterDropNamedCollectionQuery.cpp | 6 +- src/Parsers/ASTCreateNamedCollectionQuery.h | 1 + .../ParserAlterNamedCollectionQuery.cpp | 8 ++- src/Parsers/ParserCreateQuery.cpp | 17 ++++-- src/Parsers/ParserCreateQuery.h | 1 + .../ParserDropNamedCollectionQuery.cpp | 3 +- 9 files changed, 57 insertions(+), 40 deletions(-) diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp index 6ec09fb8a77..12bf100feae 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -225,24 +226,15 @@ public: void remove(const std::string & collection_name) { - if (!removeIfExists(collection_name)) + auto collection_path = getMetadataPath(collection_name); + if (!fs::exists(collection_path)) { throw Exception( ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, "Cannot remove collection `{}`, because it doesn't exist", collection_name); } - } - - bool removeIfExists(const std::string & collection_name) - { - auto collection_path = getMetadataPath(collection_name); - if (fs::exists(collection_path)) - { - fs::remove(collection_path); - return true; - } - return false; + fs::remove(collection_path); } private: @@ -393,27 +385,43 @@ void loadIfNot() return loadIfNotUnlocked(lock); } -void removeFromSQL(const std::string & collection_name, ContextPtr context) +void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context) { auto lock = lockNamedCollectionsTransaction(); loadIfNotUnlocked(lock); - LoadFromSQL(context).remove(collection_name); - NamedCollectionFactory::instance().remove(collection_name); -} - -void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context) -{ - auto lock = lockNamedCollectionsTransaction(); - loadIfNotUnlocked(lock); - LoadFromSQL(context).removeIfExists(collection_name); - NamedCollectionFactory::instance().removeIfExists(collection_name); + auto & instance = NamedCollectionFactory::instance(); + if (!instance.exists(query.collection_name)) + { + if (!query.if_exists) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove collection `{}`, because it doesn't exist", + query.collection_name); + } + return; + } + LoadFromSQL(context).remove(query.collection_name); + instance.remove(query.collection_name); } void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context) { auto lock = lockNamedCollectionsTransaction(); loadIfNotUnlocked(lock); - NamedCollectionFactory::instance().add(query.collection_name, LoadFromSQL(context).create(query)); + auto & instance = NamedCollectionFactory::instance(); + if (instance.exists(query.collection_name)) + { + if (!query.if_not_exists) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "A named collection `{}` already exists", + query.collection_name); + } + return; + } + instance.add(query.collection_name, LoadFromSQL(context).create(query)); } void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context) diff --git a/src/Common/NamedCollections/NamedCollectionUtils.h b/src/Common/NamedCollections/NamedCollectionUtils.h index c929abb5d74..6cbe9101550 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.h +++ b/src/Common/NamedCollections/NamedCollectionUtils.h @@ -8,6 +8,7 @@ namespace DB class ASTCreateNamedCollectionQuery; class ASTAlterNamedCollectionQuery; +class ASTDropNamedCollectionQuery; namespace NamedCollectionUtils { @@ -26,8 +27,7 @@ void reloadFromConfig(const Poco::Util::AbstractConfiguration & config); void loadFromSQL(ContextPtr context); /// Remove collection as well as its metadata from `context->getPath() / named_collections /`. -void removeFromSQL(const std::string & collection_name, ContextPtr context); -void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context); +void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context); /// Create a new collection from AST and put it to `context->getPath() / named_collections /`. void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context); diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index bac59998062..3b0fba5fd9f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -1,5 +1,4 @@ #include - #include #include #include diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cc3444bb4df..fe49b1cfd7c 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -22,11 +22,7 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - if (query.if_exists) - NamedCollectionUtils::removeIfExistsFromSQL(query.collection_name, current_context); - else - NamedCollectionUtils::removeFromSQL(query.collection_name, current_context); - + NamedCollectionUtils::removeFromSQL(query, current_context); return {}; } diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h index 303d1901931..785aeb781b4 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.h +++ b/src/Parsers/ASTCreateNamedCollectionQuery.h @@ -13,6 +13,7 @@ class ASTCreateNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster public: std::string collection_name; SettingsChanges changes; + bool if_not_exists = false; String getID(char) const override { return "CreateNamedCollectionQuery"; } diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp index 9108747ad82..8fb84f86246 100644 --- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp @@ -13,8 +13,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod { ParserKeyword s_alter("ALTER"); ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_delete("DELETE"); - ParserIdentifier name_p; ParserSetQuery set_p; ParserToken s_comma(TokenType::Comma); @@ -32,10 +33,13 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod if (!s_collection.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 415d3321eb5..82674ab1a35 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1421,15 +1421,17 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); - ParserKeyword s_attach("ATTACH"); ParserKeyword s_named_collection("NAMED COLLECTION"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_as("AS"); - - ParserToken s_comma(TokenType::Comma); ParserIdentifier name_p; + ParserToken s_comma(TokenType::Comma); + + String cluster_str; + bool if_not_exists = false; ASTPtr collection_name; - String cluster_str; if (!s_create.ignore(pos, expected)) return false; @@ -1437,10 +1439,13 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!s_named_collection.ignore(pos, expected)) return false; + if (s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; @@ -1465,7 +1470,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec auto query = std::make_shared(); tryGetIdentifierNameInto(collection_name, query->collection_name); + query->if_not_exists = if_not_exists; query->changes = changes; + query->cluster = std::move(cluster_str); node = query; return true; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 09935e2b608..0a98923436c 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -548,6 +548,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// CREATE NAMED COLLECTION name [ON CLUSTER cluster] class ParserCreateNamedCollectionQuery : public IParserBase { protected: diff --git a/src/Parsers/ParserDropNamedCollectionQuery.cpp b/src/Parsers/ParserDropNamedCollectionQuery.cpp index 1ea8aa6d75d..b0b010b5ef6 100644 --- a/src/Parsers/ParserDropNamedCollectionQuery.cpp +++ b/src/Parsers/ParserDropNamedCollectionQuery.cpp @@ -12,6 +12,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node ParserKeyword s_drop("DROP"); ParserKeyword s_collection("NAMED COLLECTION"); ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserIdentifier name_p; String cluster_str; @@ -31,7 +32,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; From e511072df1b26d466d96c9cf81ca6031a44956ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 09:42:38 +0300 Subject: [PATCH 027/113] Update named collections related docs --- docs/README.md | 4 +-- .../table-engines/integrations/deltalake.md | 2 +- .../table-engines/integrations/hudi.md | 2 +- .../sql-reference/statements/alter/index.md | 2 ++ .../statements/alter/named-collection.md | 30 ++++++++++++++++ .../sql-reference/statements/create/index.md | 21 ++++++------ .../statements/create/named-collection.md | 34 +++++++++++++++++++ docs/en/sql-reference/statements/drop.md | 17 ++++++++++ .../sql-reference/table-functions/iceberg.md | 2 +- 9 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 docs/en/sql-reference/statements/alter/named-collection.md create mode 100644 docs/en/sql-reference/statements/create/named-collection.md diff --git a/docs/README.md b/docs/README.md index 0cd35a4e3ec..d1260312166 100644 --- a/docs/README.md +++ b/docs/README.md @@ -200,8 +200,8 @@ Templates: - [Server Setting](_description_templates/template-server-setting.md) - [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) -- [Data type](_description_templates/data-type.md) -- [Statement](_description_templates/statement.md) +- [Data type](_description_templates/template-data-type.md) +- [Statement](_description_templates/template-statement.md) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index b562e9d7fe6..964c952f31a 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -22,7 +22,7 @@ CREATE TABLE deltalake - `url` — Bucket url with path to the existing Delta Lake table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index c60618af289..b2f599e5c92 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -22,7 +22,7 @@ CREATE TABLE hudi_table - `url` — Bucket url with the path to an existing Hudi table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7dadc2be5b2..dca34d16f25 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control: [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. +[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md). + ## Mutations `ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md new file mode 100644 index 00000000000..ac6752127c1 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -0,0 +1,30 @@ +--- +slug: /en/sql-reference/statements/alter/named-collection +sidebar_label: NAMED COLLECTION +--- + +# ALTER NAMED COLLECTION + +This query intends to modify already existing named collections. + +**Syntax** + +```sql +ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster] +[ SET +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... ] | +[ DELETE key_name4, key_name5, ... ] +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; + +ALTER NAMED COLLECTION foobar SET a = '2', c = '3'; + +ALTER NAMED COLLECTION foobar DELETE b; +``` diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 14e29d051d7..fa39526a53e 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,14 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](/docs/en/sql-reference/statements/create/database.md) +- [TABLE](/docs/en/sql-reference/statements/create/table.md) +- [VIEW](/docs/en/sql-reference/statements/create/view.md) +- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md) +- [FUNCTION](/docs/en/sql-reference/statements/create/function.md) +- [USER](/docs/en/sql-reference/statements/create/user.md) +- [ROLE](/docs/en/sql-reference/statements/create/role.md) +- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md) +- [QUOTA](/docs/en/sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md) +- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md new file mode 100644 index 00000000000..e30a8cd6df2 --- /dev/null +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -0,0 +1,34 @@ +--- +slug: /en/sql-reference/statements/create/named-collection +sidebar_label: NAMED COLLECTION +--- + +# CREATE NAMED COLLECTION + +Creates a new named collection. + +**Syntax** + +```sql +CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +``` + +**Related satements** + +- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) +- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) + + +**See Also** + +- [Named collections guide](/docs/en/operations/named-collections.md) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52..c91457993c4 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b; DROP FUNCTION linear_equation; ``` + +## DROP NAMED COLLECTION + +Deletes a named collection. + +**Syntax** + +``` sql +DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster] +``` + +**Example** + +``` sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +DROP NAMED COLLECTION foobar; +``` diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 30db0ef00aa..fa86b436a5e 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Returned value** From d6afa3de0150dfeaf2c346bf6f5080c095d73091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 10:29:36 +0300 Subject: [PATCH 028/113] Fix missing 'typename' prior to dependent type name --- src/Common/SystemLogBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index ed5ffd78a7b..3d68fe63227 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -185,7 +185,7 @@ void SystemLogQueue::confirm(uint64_t to_flush_end) } template -SystemLogQueue::Index SystemLogQueue::pop(std::vector& output, bool& should_prepare_tables_anyway, bool& exit_this_thread) +typename SystemLogQueue::Index SystemLogQueue::pop(std::vector& output, bool& should_prepare_tables_anyway, bool& exit_this_thread) { std::unique_lock lock(mutex); flush_event.wait_for(lock, From 5395d34fd719e5ff4b94a522ee42278dcc2acece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 10:32:10 +0300 Subject: [PATCH 029/113] Fix missing constructor call --- src/Storages/HDFS/StorageHDFS.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index f176ac6f037..fa6cfd824e7 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -114,9 +114,9 @@ namespace { if (next_slash_after_glob_pos == std::string::npos) { - result.emplace_back( + result.emplace_back(StorageHDFS::PathWithInfo{ String(ls.file_info[i].mName), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else { From bcc41fbc2f2f32a7187110583d8739b72a311856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 10:46:35 +0300 Subject: [PATCH 030/113] Fix reference to a local binding declared in an enclosing function --- src/Coordination/KeeperSnapshotManagerS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 0b135442265..cbb5c110eda 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -218,7 +218,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh } catch (...) { - LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_path); + LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_file_info.path); tryLogCurrentException(__PRETTY_FUNCTION__); } }); From e749938e92667ed3195c773726037d44c865993b Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 27 Jul 2023 12:07:34 +0000 Subject: [PATCH 031/113] fixed test --- src/Functions/array/arrayIntersect.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 6abfb8ec5c6..ee84e3138e8 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -628,13 +628,6 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable if (all_nullable) null_map.push_back(0); } - if (all_has_nullable && !null_added) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - null_added = true; - } } result_offsets.getElement(row) = result_offset; From da53bca6e4650b6d67737e6efd1b95232bf4ffc7 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 27 Jul 2023 18:29:50 +0000 Subject: [PATCH 032/113] added tests --- .../System/attachInformationSchemaTables.cpp | 8 ++++++++ .../test_from_system_tables.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..6f378671104 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -41,6 +41,14 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getTableDataPath(ast_create), context, true).second; database.createTable(context, ast_create.getTable(), view, ast); + if (ast_create.getTable() == "tables") + { + database.createTable(context, Poco::toUpper(ast_create.getTable()), view, ast); + } + else if (ast_create.getTable() == "TABLES") + { + database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); + } } catch (...) { diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py index ac938a9694a..ccdd09c67ef 100644 --- a/tests/integration/test_select_access_rights/test_from_system_tables.py +++ b/tests/integration/test_select_access_rights/test_from_system_tables.py @@ -190,3 +190,15 @@ def test_information_schema(): ) == "1\n" ) + assert ( + node.query( + "SELECT count() FROM information_schema.TABLES WHERE table_name='TABLES'" + ) + == "3\n" + ) + assert ( + node.query( + "SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_name='tables'" + ) + == "3\n" + ) From d1f3849d991e3115c3ded4d97ea841cc724a0e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 22:08:36 +0300 Subject: [PATCH 033/113] Fix errata at named collections sql-reference --- docs/en/sql-reference/statements/create/named-collection.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md index e30a8cd6df2..1fc7b11c554 100644 --- a/docs/en/sql-reference/statements/create/named-collection.md +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -23,7 +23,7 @@ key_name3 = 'some value', CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; ``` -**Related satements** +**Related statements** - [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) - [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) From ae4406cd807f4a94a8072d2872366d60f40a2056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 22:11:07 +0300 Subject: [PATCH 034/113] Fix named collections AST formatters (if [not] exists) --- src/Parsers/ASTAlterNamedCollectionQuery.cpp | 2 ++ src/Parsers/ASTCreateNamedCollectionQuery.cpp | 2 ++ src/Parsers/ASTDropNamedCollectionQuery.cpp | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp index 7e95147ad75..6363a7306bd 100644 --- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp @@ -15,6 +15,8 @@ ASTPtr ASTAlterNamedCollectionQuery::clone() const void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); if (!changes.empty()) diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp index 97e83541f05..45ef8565148 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp +++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp @@ -18,6 +18,8 @@ ASTPtr ASTCreateNamedCollectionQuery::clone() const void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE NAMED COLLECTION "; + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); diff --git a/src/Parsers/ASTDropNamedCollectionQuery.cpp b/src/Parsers/ASTDropNamedCollectionQuery.cpp index 3b8568cfd70..e317681d33d 100644 --- a/src/Parsers/ASTDropNamedCollectionQuery.cpp +++ b/src/Parsers/ASTDropNamedCollectionQuery.cpp @@ -13,6 +13,8 @@ ASTPtr ASTDropNamedCollectionQuery::clone() const void ASTDropNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); } From 686ed14e714771f92186e31ede4ae78ab96e2565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Thu, 27 Jul 2023 23:30:21 +0300 Subject: [PATCH 035/113] Fix alter named collection if exists --- .../NamedCollections/NamedCollectionUtils.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp index 12bf100feae..cab844d6213 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp @@ -428,9 +428,21 @@ void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr contex { auto lock = lockNamedCollectionsTransaction(); loadIfNotUnlocked(lock); + auto & instance = NamedCollectionFactory::instance(); + if (!instance.exists(query.collection_name)) + { + if (!query.if_exists) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove collection `{}`, because it doesn't exist", + query.collection_name); + } + return; + } LoadFromSQL(context).update(query); - auto collection = NamedCollectionFactory::instance().getMutable(query.collection_name); + auto collection = instance.getMutable(query.collection_name); auto collection_lock = collection->lock(); for (const auto & [name, value] : query.changes) From 96df21bbf58efbf5c1a914632702472f74f67490 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 28 Jul 2023 13:35:13 +0000 Subject: [PATCH 036/113] implemented for all tables, fixed style and fast test --- src/Storages/System/attachInformationSchemaTables.cpp | 6 +++--- .../0_stateless/01161_information_schema.reference | 8 ++++++++ .../02206_information_schema_show_database.reference | 2 ++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 6f378671104..efa24daeffe 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -41,13 +41,13 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getTableDataPath(ast_create), context, true).second; database.createTable(context, ast_create.getTable(), view, ast); - if (ast_create.getTable() == "tables") + if (database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA) { database.createTable(context, Poco::toUpper(ast_create.getTable()), view, ast); } - else if (ast_create.getTable() == "TABLES") + else { - database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); + database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); } } catch (...) diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference index 5331e30b899..4ec33a70be0 100644 --- a/tests/queries/0_stateless/01161_information_schema.reference +++ b/tests/queries/0_stateless/01161_information_schema.reference @@ -1,3 +1,7 @@ +COLUMNS +SCHEMATA +TABLES +VIEWS columns schemata tables @@ -6,6 +10,10 @@ COLUMNS SCHEMATA TABLES VIEWS +columns +schemata +tables +views INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N \N \N information_schema information_schema default \N \N \N \N default default mv VIEW diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference index 821fddbb933..f5fd221a689 100644 --- a/tests/queries/0_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -1,4 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String ALIAS table_catalog,\n `COLUMN_NAME` String ALIAS column_name,\n `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,\n `COLUMN_DEFAULT` String ALIAS column_default,\n `IS_NULLABLE` String ALIAS is_nullable,\n `DATA_TYPE` String ALIAS data_type,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,\n `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,\n `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,\n `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,\n `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,\n `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,\n `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,\n `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,\n `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,\n `COLLATION_NAME` Nullable(String) ALIAS collation_name,\n `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,\n `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,\n `DOMAIN_NAME` Nullable(String) ALIAS domain_name,\n `COLUMN_COMMENT` String ALIAS column_comment,\n `COLUMN_TYPE` String ALIAS column_type\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n database AS TABLE_SCHEMA,\n table AS table_name,\n table AS TABLE_NAME,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n type AS column_type\nFROM system.columns CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables From 918571a5eecd08362265fb0bd9543a5920fcd96a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 29 Jul 2023 22:04:21 +0000 Subject: [PATCH 037/113] Fix: check correctly frame bounds for RANGE --- src/Interpreters/WindowDescription.cpp | 48 +++++++++---------- src/Processors/Transforms/WindowTransform.cpp | 2 +- .../02833_window_func_range_offset.reference | 0 .../02833_window_func_range_offset.sql | 6 +++ 4 files changed, 29 insertions(+), 27 deletions(-) create mode 100644 tests/queries/0_stateless/02833_window_func_range_offset.reference create mode 100644 tests/queries/0_stateless/02833_window_func_range_offset.sql diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 7ed7788cf1d..8a7a5024d69 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,34 +91,30 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (type == WindowFrame::FrameType::ROWS - || type == WindowFrame::FrameType::GROUPS) + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) { - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); + } - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); - } + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); } // Check relative positioning of offsets. diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a785d52bf65..be76971ddcd 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -148,7 +148,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, const auto * reference_column = assert_cast( _reference_column); const auto offset = _offset.get(); - assert(offset >= 0); + chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.reference b/tests/queries/0_stateless/02833_window_func_range_offset.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.sql b/tests/queries/0_stateless/02833_window_func_range_offset.sql new file mode 100644 index 00000000000..f1d26c5cbaf --- /dev/null +++ b/tests/queries/0_stateless/02833_window_func_range_offset.sql @@ -0,0 +1,6 @@ +-- invalid start offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +-- invalid end offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } From c921748d6199c763d794b44f3ec3c26eae03fbba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Sun, 30 Jul 2023 08:58:16 +0300 Subject: [PATCH 038/113] Add named collections ddl on cluster integration test --- .../__init__.py | 0 .../configs/config.d/cluster.xml | 22 +++ .../configs/users.d/default.xml | 12 ++ .../test.py | 135 ++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 tests/integration/test_named_collections_if_exists_on_cluster/__init__.py create mode 100644 tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml create mode 100644 tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml create mode 100644 tests/integration/test_named_collections_if_exists_on_cluster/test.py diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/__init__.py b/tests/integration/test_named_collections_if_exists_on_cluster/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml new file mode 100644 index 00000000000..95336350c1e --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml @@ -0,0 +1,22 @@ + + + + + true + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + true + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml new file mode 100644 index 00000000000..15da914f666 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml @@ -0,0 +1,12 @@ + + + + + default + default + 1 + 1 + 1 + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/test.py b/tests/integration/test_named_collections_if_exists_on_cluster/test.py new file mode 100644 index 00000000000..9b8159a1071 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/test.py @@ -0,0 +1,135 @@ +""" +Test cases: + +--- execute on the first node +create named collection foobar as a=1, b=2; +create named collection if not exists foobar on cluster '{cluster}' as a=1, b=2, c=3; +create named collection collection_present_on_first_node as a=1, b=2, s='string', x=0, y=-1; + +--- execute on any other node +alter named collection foobar on cluster '{cluster}' set a=2, c=3; +alter named collection foobar on cluster '{cluster}' delete b; +alter named collection foobar on cluster '{cluster}' set a=3 delete c; +alter named collection if exists collection_absent_ewerywhere on cluster '{cluster}' delete b; +alter named collection if exists collection_present_on_first_node on cluster '{cluster}' delete b; + +--- execute on every node +select * from system.named_collections; + +--- execute on any node +drop named collection foobar on cluster '{cluster}'; +drop named collection if exists collection_absent_ewerywhere on cluster '{cluster}'; +drop named collection if exists collection_present_on_first_node on cluster '{cluster}'; + +--- execute on every node +select * from system.named_collections; +""" + +import logging +from json import dumps, loads +from functools import partial +import pytest +from helpers.cluster import ClickHouseCluster + +dumps = partial(dumps, ensure_ascii=False) + +NODE01, NODE02, NODE03 = "clickhouse1", "clickhouse2", "clickhouse3" + +CHECK_STRING_VALUE = "Some ~`$tr!ng-_+=123@#%^&&()|?[]{}<🤡>.,\t\n:;" + +STMT_CREATE = "CREATE NAMED COLLECTION" +STMT_ALTER = "ALTER NAMED COLLECTION" +STMT_DROP = "DROP NAMED COLLECTION" + +SYSTEM_TABLE = "system.named_collections" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + common_kwargs = dict( + main_configs=[ + "configs/config.d/cluster.xml", + ], + user_configs=[ + "configs/users.d/default.xml", + ], + with_zookeeper=True, + stay_alive=True + ) + for name in [NODE01, NODE02, NODE03]: + cluster.add_instance(name, **common_kwargs) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_create_alter_drop_on_cluster(cluster): + """ + Executes the set of queries and checks the final named collections state. + """ + q_count_collections = f"select count() from {SYSTEM_TABLE}" + + def check_collections_empty(): + for name, node in list(cluster.instances.items()): + assert "0" == node.query(q_count_collections).strip(), f"{SYSTEM_TABLE} is not empty on {name}" + + foobar_final_state = { + "name": "foobar", + "collection": {"a": "3"} + } + collection_present_on_first_node_final_state = { + "name": "collection_present_on_first_node", + "collection": {"a": "1", "s": CHECK_STRING_VALUE, "x": "0", "y": "-1"} + } + expected_state = { + NODE01: [foobar_final_state, collection_present_on_first_node_final_state], + NODE02: [foobar_final_state], + NODE03: [foobar_final_state] + } + + q_get_collections = f"select * from {SYSTEM_TABLE} order by name desc format JSON" + + def check_state(): + for name, node in list(cluster.instances.items()): + result = loads(node.query(q_get_collections))["data"] + logging.debug('%s ?= %s', dumps(result), dumps(expected_state[name])) + assert expected_state[name] == result, f"invalid {SYSTEM_TABLE} content on {name}: {result}" + + check_collections_empty() + + # create executed on the first node + node = cluster.instances[NODE01] + node.query(f"{STMT_CREATE} foobar AS a=1, b=2") + node.query(f"{STMT_CREATE} IF NOT EXISTS foobar ON CLUSTER 'cluster' AS a=1, b=2, c=3") + node.query(f"{STMT_CREATE} collection_present_on_first_node AS a=1, b=2, s='{CHECK_STRING_VALUE}', x=0, y=-1") + + # alter executed on the second node + node = cluster.instances[NODE02] + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=2, c=3") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' DELETE b") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=3 DELETE c") + node.query(f"{STMT_ALTER} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster' DELETE b") + node.query(f"{STMT_ALTER} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster' DELETE b") + + check_state() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_state() + + # drop executed on the third node + node = cluster.instances[NODE03] + node.query(f"{STMT_DROP} foobar ON CLUSTER 'cluster'") + node.query(f"{STMT_DROP} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster'") + node.query(f"{STMT_DROP} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster'") + + check_collections_empty() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_collections_empty() From b3834a3a5b9382e1e25dded831719e2af0da0d30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Sun, 30 Jul 2023 08:59:44 +0300 Subject: [PATCH 039/113] Add bugfix validate check description --- docs/en/development/continuous-integration.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 738c5458cc3..c76ab738004 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s Runs [integration tests](tests.md#integration-tests). +## Bugfix validate check +Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label. + + ## Stress Test Runs stateless functional tests concurrently from several clients to detect concurrency-related errors. If it fails: From 9c5b96e26cee0fd2685ef0b8312ebf3935e34f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Sun, 30 Jul 2023 09:37:19 +0300 Subject: [PATCH 040/113] Add "bugfix" to aspell-ignore --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a314815e2c4..81415436c45 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -98,6 +98,7 @@ BlockWriteOps BlockWriteTime Bool BrokenDistributedFilesToInsert +Bugfix BuildID BuilderBinAarch BuilderBinAmd @@ -1158,6 +1159,7 @@ brotli bson bsoneachrow buffersize +bugfix buildId buildable builtins From 7725c534081d28a009bfe3b23d563b1bb2dc1471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=9A=D0=BE=D1=80=D0=B3=D1=83?= =?UTF-8?q?=D0=BD?= Date: Sun, 30 Jul 2023 09:42:42 +0300 Subject: [PATCH 041/113] Reformat the previously added test with black --- .../test.py | 45 ++++++++++++------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/test.py b/tests/integration/test_named_collections_if_exists_on_cluster/test.py index 9b8159a1071..5f5be9156b9 100644 --- a/tests/integration/test_named_collections_if_exists_on_cluster/test.py +++ b/tests/integration/test_named_collections_if_exists_on_cluster/test.py @@ -56,7 +56,7 @@ def cluster(): "configs/users.d/default.xml", ], with_zookeeper=True, - stay_alive=True + stay_alive=True, ) for name in [NODE01, NODE02, NODE03]: cluster.add_instance(name, **common_kwargs) @@ -78,20 +78,19 @@ def test_create_alter_drop_on_cluster(cluster): def check_collections_empty(): for name, node in list(cluster.instances.items()): - assert "0" == node.query(q_count_collections).strip(), f"{SYSTEM_TABLE} is not empty on {name}" + assert ( + "0" == node.query(q_count_collections).strip() + ), f"{SYSTEM_TABLE} is not empty on {name}" - foobar_final_state = { - "name": "foobar", - "collection": {"a": "3"} - } + foobar_final_state = {"name": "foobar", "collection": {"a": "3"}} collection_present_on_first_node_final_state = { "name": "collection_present_on_first_node", - "collection": {"a": "1", "s": CHECK_STRING_VALUE, "x": "0", "y": "-1"} + "collection": {"a": "1", "s": CHECK_STRING_VALUE, "x": "0", "y": "-1"}, } expected_state = { NODE01: [foobar_final_state, collection_present_on_first_node_final_state], NODE02: [foobar_final_state], - NODE03: [foobar_final_state] + NODE03: [foobar_final_state], } q_get_collections = f"select * from {SYSTEM_TABLE} order by name desc format JSON" @@ -99,24 +98,34 @@ def test_create_alter_drop_on_cluster(cluster): def check_state(): for name, node in list(cluster.instances.items()): result = loads(node.query(q_get_collections))["data"] - logging.debug('%s ?= %s', dumps(result), dumps(expected_state[name])) - assert expected_state[name] == result, f"invalid {SYSTEM_TABLE} content on {name}: {result}" + logging.debug("%s ?= %s", dumps(result), dumps(expected_state[name])) + assert ( + expected_state[name] == result + ), f"invalid {SYSTEM_TABLE} content on {name}: {result}" check_collections_empty() # create executed on the first node node = cluster.instances[NODE01] node.query(f"{STMT_CREATE} foobar AS a=1, b=2") - node.query(f"{STMT_CREATE} IF NOT EXISTS foobar ON CLUSTER 'cluster' AS a=1, b=2, c=3") - node.query(f"{STMT_CREATE} collection_present_on_first_node AS a=1, b=2, s='{CHECK_STRING_VALUE}', x=0, y=-1") + node.query( + f"{STMT_CREATE} IF NOT EXISTS foobar ON CLUSTER 'cluster' AS a=1, b=2, c=3" + ) + node.query( + f"{STMT_CREATE} collection_present_on_first_node AS a=1, b=2, s='{CHECK_STRING_VALUE}', x=0, y=-1" + ) # alter executed on the second node node = cluster.instances[NODE02] node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=2, c=3") node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' DELETE b") node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=3 DELETE c") - node.query(f"{STMT_ALTER} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster' DELETE b") - node.query(f"{STMT_ALTER} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster' DELETE b") + node.query( + f"{STMT_ALTER} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster' DELETE b" + ) + node.query( + f"{STMT_ALTER} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster' DELETE b" + ) check_state() for node in list(cluster.instances.values()): @@ -126,8 +135,12 @@ def test_create_alter_drop_on_cluster(cluster): # drop executed on the third node node = cluster.instances[NODE03] node.query(f"{STMT_DROP} foobar ON CLUSTER 'cluster'") - node.query(f"{STMT_DROP} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster'") - node.query(f"{STMT_DROP} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster'") + node.query( + f"{STMT_DROP} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster'" + ) + node.query( + f"{STMT_DROP} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster'" + ) check_collections_empty() for node in list(cluster.instances.values()): From dd85c3c114c846b3a4398841e32f1d96981a9a42 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 30 Jul 2023 22:40:11 +0200 Subject: [PATCH 042/113] Update ReadBufferFromHDFS.cpp --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 94780c2e290..3476defd5b1 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -58,7 +58,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); + if (file_size_.has_value()) + { + file_size = file_size_.value(); + } + else + { + auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str()); + if (!file_info) + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", hdfs_file_path); + file_size = static_cast(file_info->mSize); + } } ~ReadBufferFromHDFSImpl() override From 6d971bc3a8985e8ddbaf243a01d1a68b872d5322 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 30 Jul 2023 22:35:19 +0000 Subject: [PATCH 043/113] Specific check for NaN in window functions with RANGE --- src/Interpreters/WindowDescription.cpp | 58 ++++++++++++------- .../02833_window_func_range_offset.reference | 9 +++ .../02833_window_func_range_offset.sql | 5 +- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 8a7a5024d69..702a042e74e 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,30 +91,46 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) + if (type == WindowFrame::FrameType::ROWS + || type == WindowFrame::FrameType::GROUPS) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); + } - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); + } + } + else if (type == WindowFrame::FrameType::RANGE) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); + if (begin_type == BoundaryType::Offset && begin_offset.getType() == Field::Types::Float64 && isNaN(begin_offset.get())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame start offset for '{}' frame cannot be NaN for Floats", type); + } + + if (end_type == BoundaryType::Offset && end_offset.getType() == Field::Types::Float64 && isNaN(end_offset.get())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame end offset for '{}' frame cannot be NaN for Floats", type); + } } // Check relative positioning of offsets. diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.reference b/tests/queries/0_stateless/02833_window_func_range_offset.reference index e69de29bb2d..cf254aa2024 100644 --- a/tests/queries/0_stateless/02833_window_func_range_offset.reference +++ b/tests/queries/0_stateless/02833_window_func_range_offset.reference @@ -0,0 +1,9 @@ +-- { echoOn } +-- invalid start offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); +1 +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +-- invalid end offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); +1 +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.sql b/tests/queries/0_stateless/02833_window_func_range_offset.sql index f1d26c5cbaf..1c75543b3f1 100644 --- a/tests/queries/0_stateless/02833_window_func_range_offset.sql +++ b/tests/queries/0_stateless/02833_window_func_range_offset.sql @@ -1,6 +1,7 @@ +-- { echoOn } -- invalid start offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } -- invalid end offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } From 3d7257cc7e2de2457ec7a2391bb2751c415ed125 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 29 Jul 2023 22:04:21 +0000 Subject: [PATCH 044/113] Allow Floats as boundaries for RANGE is nonsense --- src/Interpreters/WindowDescription.cpp | 58 +++++++------------ .../0_stateless/01591_window_functions.sql | 8 +-- .../02833_window_func_range_offset.reference | 9 --- .../02833_window_func_range_offset.sql | 5 +- 4 files changed, 27 insertions(+), 53 deletions(-) diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 702a042e74e..8a7a5024d69 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,46 +91,30 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (type == WindowFrame::FrameType::ROWS - || type == WindowFrame::FrameType::GROUPS) + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) { - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } - - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); } - else if (type == WindowFrame::FrameType::RANGE) - { - if (begin_type == BoundaryType::Offset && begin_offset.getType() == Field::Types::Float64 && isNaN(begin_offset.get())) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame start offset for '{}' frame cannot be NaN for Floats", type); - } - if (end_type == BoundaryType::Offset && end_offset.getType() == Field::Types::Float64 && isNaN(end_offset.get())) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame end offset for '{}' frame cannot be NaN for Floats", type); - } + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); } // Check relative positioning of offsets. diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3c9c1f9cea7..07e323b3c40 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -474,10 +474,10 @@ select count() over () from numbers(4) where number < 2; -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.reference b/tests/queries/0_stateless/02833_window_func_range_offset.reference index cf254aa2024..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02833_window_func_range_offset.reference +++ b/tests/queries/0_stateless/02833_window_func_range_offset.reference @@ -1,9 +0,0 @@ --- { echoOn } --- invalid start offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -1 -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } --- invalid end offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -1 -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.sql b/tests/queries/0_stateless/02833_window_func_range_offset.sql index 1c75543b3f1..f1d26c5cbaf 100644 --- a/tests/queries/0_stateless/02833_window_func_range_offset.sql +++ b/tests/queries/0_stateless/02833_window_func_range_offset.sql @@ -1,7 +1,6 @@ --- { echoOn } -- invalid start offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } -- invalid end offset with RANGE -SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -- { serverError BAD_ARGUMENTS } SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } From 64f11d4853ebadc50e198612071d00aedcf6f79c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 31 Jul 2023 08:30:55 +0000 Subject: [PATCH 045/113] Update reference file --- .../queries/0_stateless/01591_window_functions.reference | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 8939ea1111d..ce9c6f4589e 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1193,10 +1193,10 @@ select count() over () from numbers(4) where number < 2; 2 -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; 1 1 6 6 From 964e4311c3950aa516b6cf435afc75f3e3245fb7 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 31 Jul 2023 13:03:56 +0000 Subject: [PATCH 046/113] reviewed changes --- .../System/attachInformationSchemaTables.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index efa24daeffe..544ab75ac29 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -40,15 +40,9 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; - database.createTable(context, ast_create.getTable(), view, ast); - if (database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA) - { - database.createTable(context, Poco::toUpper(ast_create.getTable()), view, ast); - } - else - { - database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); - } + database.createTable(context, Poco::toUpper(ast_create.getTable()), view, ast); + database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); + } catch (...) { From 74c0574d519378feb36a657988c22d0b93c31638 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 31 Jul 2023 15:22:16 +0200 Subject: [PATCH 047/113] Update ReadBufferFromHDFS.cpp --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 3476defd5b1..122ed335265 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -75,7 +75,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); } } From 1fcbf5f96d1fb13c118c497fa0a2732629e99c75 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 1 Aug 2023 07:33:14 +0200 Subject: [PATCH 048/113] Update tests --- tests/queries/0_stateless/02724_database_s3.sh | 4 ++-- tests/queries/0_stateless/02725_database_hdfs.sh | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index bb8f1f5f7ee..ca6264ac681 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 89ff7421a6f..aed5a6a0a39 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -43,12 +43,12 @@ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" +""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK3" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK4" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK2" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK3" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK4" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK5" # Cleanup From 6e39065bd0054aa1d9f12352f9a962b3f1f9c1b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 1 Aug 2023 10:14:49 +0200 Subject: [PATCH 049/113] Better tests --- tests/queries/0_stateless/02724_database_s3.sh | 4 ++-- tests/queries/0_stateless/02725_database_hdfs.sh | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index ca6264ac681..ae3b048938e 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index aed5a6a0a39..6b69051de3b 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -45,10 +45,10 @@ USE test4; SELECT * FROM \"abacaba/file.tsv\" """ 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK2" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK3" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK4" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" # Cleanup From 5880a0a3721d8d3d3f39ce7f1196eac4be433bf8 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 1 Aug 2023 11:50:31 +0200 Subject: [PATCH 050/113] Update ReadBufferFromHDFS.cpp --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 122ed335265..150c40a8151 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -88,7 +88,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory Date: Tue, 1 Aug 2023 11:55:08 +0200 Subject: [PATCH 051/113] Update ReadBufferFromHDFS.cpp --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 150c40a8151..9455afe7833 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -80,6 +80,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); + hdfsFreeFileInfo(file_info, 1); } } From ce4f273be1f6cf7168347a5d1507580a06ee8255 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 1 Aug 2023 12:39:36 +0200 Subject: [PATCH 052/113] Updated list of retryable errors and updated both tests to use same azure_query function --- .../test.py | 23 +++---------------- .../test_storage_azure_blob_storage/test.py | 2 ++ 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 86b70f8db70..dcca85fca7a 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -7,6 +7,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.utility import generate_values, replace_config, SafeThread from azure.storage.blob import BlobServiceClient +from test_storage_azure_blob_storage.test import azure_query SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -38,28 +39,10 @@ def cluster(): cluster.shutdown() -# Note: use this for selects and inserts and create table queries. +# Note: use azure_query for selects and inserts and create table queries. # For inserts there is no guarantee that retries will not result in duplicates. -# But it is better to retry anyway because 'Connection was closed by the server' error +# But it is better to retry anyway because connection related errors # happens in fact only for inserts because reads already have build-in retries in code. -def azure_query(node, query, try_num=3, settings={}): - for i in range(try_num): - try: - return node.query(query, settings=settings) - except Exception as ex: - retriable_errors = [ - "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response" - ] - retry = False - for error in retriable_errors: - if error in str(ex): - retry = True - logging.info(f"Try num: {i}. Having retriable error: {ex}") - break - if not retry or i == try_num - 1: - raise Exception(ex) - continue - def create_table(node, table_name, **additional_settings): settings = { diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 21f57a67495..6e103bdf96d 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -43,6 +43,8 @@ def azure_query(node, query, try_num=10, settings={}): retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", ] retry = False for error in retriable_errors: From 4561378c2085eca3bc4ff3dc8e3b512ee6407398 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 1 Aug 2023 10:57:46 +0000 Subject: [PATCH 053/113] Automatic style fix --- tests/integration/test_merge_tree_azure_blob_storage/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index dcca85fca7a..121b9b294f0 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -44,6 +44,7 @@ def cluster(): # But it is better to retry anyway because connection related errors # happens in fact only for inserts because reads already have build-in retries in code. + def create_table(node, table_name, **additional_settings): settings = { "storage_policy": "blob_storage_policy", From 2227945529fb62261e204dfd5a5bc874f097d917 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 1 Aug 2023 13:34:19 +0000 Subject: [PATCH 054/113] fixed tests --- tests/integration/test_backup_restore_new/test.py | 1 + tests/integration/test_mysql_protocol/golang.reference | 1 + tests/integration/test_mysql_protocol/test.py | 8 ++++---- .../test_select_access_rights/test_from_system_tables.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 53f1599a0d6..92965d910f4 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1227,6 +1227,7 @@ def test_backup_all(exclude_system_log_tables): "asynchronous_insert_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] + exclude_from_backup += ["information_schema.COLUMNS", "information_schema.SCHEMATA", "information_schema.TABLES", "information_schema.VIEWS"] backup_command = f"BACKUP ALL {'EXCEPT TABLES ' + ','.join(exclude_from_backup) if exclude_from_backup else ''} TO {backup_name}" diff --git a/tests/integration/test_mysql_protocol/golang.reference b/tests/integration/test_mysql_protocol/golang.reference index 4069b2a086a..7680c8c74dc 100644 --- a/tests/integration/test_mysql_protocol/golang.reference +++ b/tests/integration/test_mysql_protocol/golang.reference @@ -14,6 +14,7 @@ a UNSIGNED TINYINT Result: tables 1 tables 1 +tables 1 Columns: a b diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 5e03c4883b4..494af3c219d 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -679,7 +679,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} default 123".format( @@ -688,7 +688,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test.php {host} {port} user_with_double_sha1 abacaba".format( @@ -697,7 +697,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} user_with_double_sha1 abacaba".format( @@ -706,7 +706,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" def test_mysqljs_client(started_cluster, nodejs_container): diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py index ccdd09c67ef..cd29183561a 100644 --- a/tests/integration/test_select_access_rights/test_from_system_tables.py +++ b/tests/integration/test_select_access_rights/test_from_system_tables.py @@ -194,7 +194,7 @@ def test_information_schema(): node.query( "SELECT count() FROM information_schema.TABLES WHERE table_name='TABLES'" ) - == "3\n" + == "2\n" ) assert ( node.query( From ad2489be8578c24eaa3b2dfb092dde648061e09c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 1 Aug 2023 16:45:10 +0200 Subject: [PATCH 055/113] Debug tests --- tests/queries/0_stateless/02724_database_s3.sh | 4 ++-- tests/queries/0_stateless/02725_database_hdfs.sh | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index ae3b048938e..d7eb5b101d2 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +""" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +""" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 6b69051de3b..aa1fb80ddca 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -45,10 +45,10 @@ USE test4; SELECT * FROM \"abacaba/file.tsv\" """ 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" # Cleanup From 6db7eaf4d8bbb57ab4911748cfbb0dbcab499d0d Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 1 Aug 2023 19:14:11 +0000 Subject: [PATCH 056/113] fixed ast --- .../System/attachInformationSchemaTables.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 544ab75ac29..9a909110889 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -18,7 +18,7 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); if (database.getEngineName() != "Memory") return; - bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; + // bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; auto attach_query = getResource(metadata_resource_name); @@ -34,14 +34,20 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d assert(view_name == ast_create.getTable()); ast_create.attach = false; ast_create.setDatabase(database.getDatabaseName()); - if (is_uppercase) - ast_create.setTable(Poco::toUpper(view_name)); + // if (is_uppercase) + // ast_create.setTable(Poco::toUpper(view_name)); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; - - database.createTable(context, Poco::toUpper(ast_create.getTable()), view, ast); - database.createTable(context, Poco::toLower(ast_create.getTable()), view, ast); + database.createTable(context, ast_create.getTable(), view, ast); + ASTPtr ast_upper = ast_create.clone(); + auto & ast_create_upper = ast_upper->as(); + ast_create.setTable(Poco::toUpper(view_name)); + StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), + database.getTableDataPath(ast_create_upper), context, true).second; + + database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); + } catch (...) From ddf55b5575f109f3049ab4c9cdf725df74b32656 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 1 Aug 2023 23:03:47 +0200 Subject: [PATCH 057/113] Fix tests --- .../queries/0_stateless/02724_database_s3.reference | 4 ++-- tests/queries/0_stateless/02724_database_s3.sh | 4 ++-- .../0_stateless/02725_database_hdfs.reference | 12 ++++++------ tests/queries/0_stateless/02725_database_hdfs.sh | 12 ++++++------ 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index 425cca6a077..bf8ea70f1d1 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -17,5 +17,5 @@ test1 16 17 18 0 0 0 Test 2: check exceptions -OK -OK +BAD_ARGUMENTS +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index d7eb5b101d2..15e93ff117f 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index ef8adae2bbc..9f63e757b27 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -4,9 +4,9 @@ test1 1 2 3 test2 Test 2: check exceptions -OK0 -OK1 -OK2 -OK3 -OK4 -OK5 +BAD_ARGUMENTS +CANNOT_EXTRACT_TABLE_STRUCTURE +BAD_ARGUMENTS +BAD_ARGUMENTS +CANNOT_EXTRACT_TABLE_STRUCTURE +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index aa1fb80ddca..c0bd9ab5e57 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -36,19 +36,19 @@ echo "Test 2: check exceptions" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK0" +""" | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "FILE_DOESNT_EXIST" > /dev/null && echo "OK1" +""" | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' #| grep -oF "FILE_DOESNT_EXIST" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" # Cleanup From 0174244fe12666f3792586a641c46dab1f597ebf Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 2 Aug 2023 11:21:47 +0000 Subject: [PATCH 058/113] fixed tests --- src/Storages/System/attachInformationSchemaTables.cpp | 2 +- .../02206_information_schema_show_database.reference | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 9a909110889..3eb7d74b9d2 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -42,7 +42,7 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.createTable(context, ast_create.getTable(), view, ast); ASTPtr ast_upper = ast_create.clone(); auto & ast_create_upper = ast_upper->as(); - ast_create.setTable(Poco::toUpper(view_name)); + ast_create_upper.setTable(Poco::toUpper(view_name)); StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), database.getTableDataPath(ast_create_upper), context, true).second; diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference index f5fd221a689..0cf7913e28e 100644 --- a/tests/queries/0_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -1,6 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String ALIAS table_catalog,\n `COLUMN_NAME` String ALIAS column_name,\n `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,\n `COLUMN_DEFAULT` String ALIAS column_default,\n `IS_NULLABLE` String ALIAS is_nullable,\n `DATA_TYPE` String ALIAS data_type,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,\n `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,\n `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,\n `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,\n `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,\n `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,\n `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,\n `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,\n `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,\n `COLLATION_NAME` Nullable(String) ALIAS collation_name,\n `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,\n `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,\n `DOMAIN_NAME` Nullable(String) ALIAS domain_name,\n `COLUMN_COMMENT` String ALIAS column_comment,\n `COLUMN_TYPE` String ALIAS column_type\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n database AS TABLE_SCHEMA,\n table AS table_name,\n table AS TABLE_NAME,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n type AS column_type\nFROM system.columns CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables -CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables -CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables From dab77b044be007f0c2854777d4057f2f07721dde Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 2 Aug 2023 13:23:18 +0200 Subject: [PATCH 059/113] Update attachInformationSchemaTables.cpp --- src/Storages/System/attachInformationSchemaTables.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 3eb7d74b9d2..07e9ab380d4 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -18,7 +18,6 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); if (database.getEngineName() != "Memory") return; - // bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; auto attach_query = getResource(metadata_resource_name); @@ -34,8 +33,6 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d assert(view_name == ast_create.getTable()); ast_create.attach = false; ast_create.setDatabase(database.getDatabaseName()); - // if (is_uppercase) - // ast_create.setTable(Poco::toUpper(view_name)); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; From ffdc174c9a9984042b947ddb5c20d2258554d34e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 2 Aug 2023 13:24:05 +0200 Subject: [PATCH 060/113] Update test.py --- tests/integration/test_backup_restore_new/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 92965d910f4..53f1599a0d6 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1227,7 +1227,6 @@ def test_backup_all(exclude_system_log_tables): "asynchronous_insert_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] - exclude_from_backup += ["information_schema.COLUMNS", "information_schema.SCHEMATA", "information_schema.TABLES", "information_schema.VIEWS"] backup_command = f"BACKUP ALL {'EXCEPT TABLES ' + ','.join(exclude_from_backup) if exclude_from_backup else ''} TO {backup_name}" From e811d5c4e9d5c7eaa204410417f8e53e772950c9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:06:20 +0200 Subject: [PATCH 061/113] Update attachInformationSchemaTables.cpp --- src/Storages/System/attachInformationSchemaTables.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 07e9ab380d4..43a3840cdf0 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -42,9 +42,8 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d ast_create_upper.setTable(Poco::toUpper(view_name)); StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), database.getTableDataPath(ast_create_upper), context, true).second; - + database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); - } catch (...) From e369f0da7cdb4a2db51bb1f428830103011749f2 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 2 Aug 2023 17:31:35 +0200 Subject: [PATCH 062/113] Resolve expressions from WITH clause on usage --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 34432d054e1..0b3d19f1861 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6887,13 +6887,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier scope.scope_node->formatASTForErrorMessage()); } - std::erase_if(with_nodes, [](const QueryTreeNodePtr & node) - { - auto * subquery_node = node->as(); - auto * union_node = node->as(); - - return (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); - }); + /** WITH section can be safely removed, because WITH section only can provide aliases to query expressions + * and CTE for other sections to use. + * + * Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table); + */ + query_node_typed.getWith().getNodes().clear(); for (auto & window_node : query_node_typed.getWindow().getNodes()) { @@ -6952,9 +6951,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier scope.scope_node->formatASTForErrorMessage()); } - if (query_node_typed.hasWith()) - resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); - if (query_node_typed.getPrewhere()) resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -7123,13 +7119,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier scope.scope_node->formatASTForErrorMessage()); } - /** WITH section can be safely removed, because WITH section only can provide aliases to query expressions - * and CTE for other sections to use. - * - * Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table); - */ - query_node_typed.getWith().getNodes().clear(); - /** WINDOW section can be safely removed, because WINDOW section can only provide window definition to window functions. * * Example: SELECT count(*) OVER w FROM test_table WINDOW w AS (PARTITION BY id); From 4c5c3a338d9ce83b2788a24878507ce978f24e69 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 2 Aug 2023 17:48:10 +0200 Subject: [PATCH 063/113] Maybe fix tests --- tests/queries/0_stateless/02724_database_s3.reference | 2 ++ tests/queries/0_stateless/02725_database_hdfs.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index bf8ea70f1d1..437549a973a 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -19,3 +19,5 @@ test1 Test 2: check exceptions BAD_ARGUMENTS BAD_ARGUMENTS +BAD_ARGUMENTS +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index c0bd9ab5e57..dab2d37605c 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash # Tags: no-fasttest, use-hdfs, no-parallel +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 85ba27a2d4b5aa2fc80c90b7393ab327da8e79dc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 2 Aug 2023 18:22:48 +0200 Subject: [PATCH 064/113] Add a test --- .../02841_with_clause_resolve.reference | 15 ++ .../0_stateless/02841_with_clause_resolve.sql | 141 ++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 tests/queries/0_stateless/02841_with_clause_resolve.reference create mode 100644 tests/queries/0_stateless/02841_with_clause_resolve.sql diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.reference b/tests/queries/0_stateless/02841_with_clause_resolve.reference new file mode 100644 index 00000000000..e2dfc4d85a9 --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.reference @@ -0,0 +1,15 @@ +2.5 +2.5 +2.5 +2.5 +2.5 +(9399,2858) +(7159,6972) +(7456,3524) +(12685,10191) +(12598,4979) +(9824,2699) +(5655,7793) +(14410,10296) +(16211,7662) +(9349,9053) diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.sql b/tests/queries/0_stateless/02841_with_clause_resolve.sql new file mode 100644 index 00000000000..b416446461b --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.sql @@ -0,0 +1,141 @@ +set allow_experimental_analyzer = 1; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Delay + , (time, wave, delay_, decay, count) -> arraySum(n1 -> wave(time - delay_ * n1), range(count)) AS delay + + , delay(time, (time -> 0.5), 0.2, 0.5, 5) AS kick + +SELECT + + kick + +FROM system.numbers +LIMIT 5; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Output control + , 1 AS master_volume + , level -> least(1.0, greatest(-1.0, level)) AS clamp + , level -> (clamp(level) * 0x7FFF * master_volume)::Int16 AS output + , x -> (x, x) AS mono + + -- Basic waves + , time -> sin(time * 2 * pi()) AS sine_wave + , time -> time::UInt64 % 2 * 2 - 1 AS square_wave + , time -> (time - floor(time)) * 2 - 1 AS sawtooth_wave + , time -> abs(sawtooth_wave(time)) * 2 - 1 AS triangle_wave + + -- Helpers + , (from, to, wave, time) -> from + ((wave(time) + 1) / 2) * (to - from) AS lfo + , (from, to, steps, time) -> from + floor((time - floor(time)) * steps) / steps * (to - from) AS step_lfo + , (from, to, steps, time) -> exp(step_lfo(log(from), log(to), steps, time)) AS exp_step_lfo + + -- Noise + , time -> cityHash64(time) / 0xFFFFFFFFFFFFFFFF AS uniform_noise + , time -> erf(uniform_noise(time)) AS white_noise + , time -> cityHash64(time) % 2 ? 1 : -1 AS bernoulli_noise + + -- Distortion + , (x, amount) -> clamp(x * amount) AS clipping + , (x, amount) -> clamp(x > 0 ? pow(x, amount) : -pow(-x, amount)) AS power_distortion + , (x, amount) -> round(x * exp2(amount)) / exp2(amount) AS bitcrush + , (time, sample_frequency) -> round(time * sample_frequency) / sample_frequency AS desample + , (time, wave, amount) -> (time - floor(time) < (1 - amount)) ? wave(time * (1 - amount)) : 0 AS thin + , (time, wave, amount) -> wave(floor(time) + pow(time - floor(time), amount)) AS skew + + -- Combining + , (a, b, weight) -> a * (1 - weight) + b * weight AS combine + + -- Envelopes + , (time, offset, attack, hold, release) -> + time < offset ? 0 + : (time < offset + attack ? ((time - offset) / attack) + : (time < offset + attack + hold ? 1 + : (time < offset + attack + hold + release ? (offset + attack + hold + release - time) / release + : 0))) AS envelope + + , (bpm, time, offset, attack, hold, release) -> + envelope( + time * (bpm / 60) - floor(time * (bpm / 60)), + offset, + attack, + hold, + release) AS running_envelope + + -- Sequencers + , (sequence, time) -> sequence[1 + time::UInt64 % length(sequence)] AS sequencer + + -- Delay + , (time, wave, delay, decay, count) -> arraySum(n -> wave(time - delay * n) * pow(decay, n), range(count)) AS delay + + + , delay(time, (time -> power_distortion(sine_wave(time * 80 + sine_wave(time * 2)), lfo(0.5, 1, sine_wave, time / 16)) + * running_envelope(60, time, 0, 0.0, 0.01, 0.1)), + 0.2, 0.5, 5) AS kick + +SELECT + + (output( + kick + + delay(time, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + + + sine_wave(time * 100 + 1 * sine_wave(time * 10 + 1/4)) + * running_envelope(120, time, 0, 0.01, 0.01, 0.1) + ), + + output( + kick + + delay(time + 0.01, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time - 0.01, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time + 0.005, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + )) + +FROM system.numbers +LIMIT 10; From 67d8f1c1c512c1de148784c3772d60f2305e48f4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 2 Aug 2023 18:42:38 +0200 Subject: [PATCH 065/113] fix data race --- programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 796ab583fe4..e6d5837dd0e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1650,6 +1650,7 @@ try database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); maybeConvertSystemDatabase(global_context); + startupSystemTables(); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -1668,7 +1669,6 @@ try /// Then, load remaining databases loadMetadata(global_context, default_database); convertDatabasesEnginesIfNeed(global_context); - startupSystemTables(); database_catalog.startupBackgroundCleanup(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); From 6dcdc4cd380c773a9dd135c66cdb79995ba0550a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 2 Aug 2023 15:35:47 +0200 Subject: [PATCH 066/113] Remove mark_failed_report_pending, it's already in builds_report_config --- tests/ci/build_check.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index eae9efdd548..b75650d6d60 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,12 +9,6 @@ import time from typing import List, Tuple from ci_config import CI_CONFIG, BuildConfig -from commit_status_helper import ( - NotSet, - get_commit_filtered_statuses, - get_commit, - post_commit_status, -) from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_JOB, @@ -24,8 +18,6 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from github_helper import GitHub from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen @@ -242,34 +234,6 @@ def upload_master_static_binaries( print(f"::notice ::Binary static URL: {url}") -def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None: - try: - gh = GitHub(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - statuses = get_commit_filtered_statuses(commit) - report_status = [ - name - for name, builds in CI_CONFIG["builds_report_config"].items() - if build_name in builds - ][0] - for status in statuses: - if status.context == report_status and status.state in ["failure", "error"]: - logging.info( - "Commit already have failed status for '%s', setting it to 'pending'", - report_status, - ) - post_commit_status( - commit, - "pending", - status.target_url or NotSet, - "Set to pending on rerun", - report_status, - pr_info, - ) - except: # we do not care about any exception here - logging.info("Failed to get or mark the reports status as pending, continue") - - def main(): logging.basicConfig(level=logging.INFO) @@ -300,9 +264,6 @@ def main(): # put them as github actions artifact (result) check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) - # If it's a latter running, we need to mark possible failed status - mark_failed_reports_pending(build_name, pr_info) - docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version From 89bb3f3410bbab491eead2adae2b25ffbfe82c70 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 2 Aug 2023 19:25:33 +0200 Subject: [PATCH 067/113] fix style-check --- tests/integration/test_storage_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 31a49808edf..ae105aba4c8 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -649,7 +649,7 @@ def test_read_subcolumns(cluster): assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" - + def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" From 471296c098f42b7a6fa3ed59289ac5c3659c3ecb Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 2 Aug 2023 22:20:03 +0200 Subject: [PATCH 068/113] Update 02174_cte_scalar_cache.sql --- .../queries/0_stateless/02174_cte_scalar_cache.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql index 50a10834e64..86cfff21446 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -6,7 +6,7 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_ON', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_ON', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 1; @@ -16,17 +16,17 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_OFF', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 0; WITH - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_NEW_ANALYZER', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_NEW_ANALYZER', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS allow_experimental_analyzer = 1; From dbf14b5e2dd96bd8f76f6db9671353afa52c20be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 2 Aug 2023 23:16:06 +0200 Subject: [PATCH 069/113] Fix wrong error code "BAD_GET" --- src/Common/ErrorCodes.cpp | 3 ++- src/Interpreters/Context.cpp | 6 +++--- src/TableFunctions/ITableFunctionCluster.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 393486f805c..a3277821111 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -582,7 +582,8 @@ M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \ M(698, INVALID_REDIS_STORAGE_TYPE) \ M(699, INVALID_REDIS_TABLE_STRUCTURE) \ - M(700, USER_SESSION_LIMIT_EXCEEDED) \ + M(700, USER_SESSION_LIMIT_EXCEEDED) \ + M(701, CLUSTER_DOESNT_EXIST) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a5ff7643294..8eba4a9c3a5 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -166,7 +166,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int BAD_GET; extern const int UNKNOWN_DATABASE; extern const int UNKNOWN_TABLE; extern const int TABLE_ALREADY_EXISTS; @@ -181,6 +180,7 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CLUSTER_DOESNT_EXIST; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -3073,7 +3073,7 @@ UInt16 Context::getServerPort(const String & port_name) const { auto it = shared->server_ports.find(port_name); if (it == shared->server_ports.end()) - throw Exception(ErrorCodes::BAD_GET, "There is no port named {}", port_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "There is no port named {}", port_name); else return it->second; } @@ -3082,7 +3082,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); } diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index a8329684ee6..7e81d6d21b7 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -17,7 +17,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; + extern const int CLUSTER_DOESNT_EXIST; extern const int LOGICAL_ERROR; } @@ -59,7 +59,7 @@ protected: cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is args.erase(args.begin()); From aaf3eb4335c2c92d463e719e40ae2b1228a82486 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 2 Aug 2023 23:18:31 +0200 Subject: [PATCH 070/113] Fix tests --- tests/queries/0_stateless/02725_database_hdfs.reference | 1 - tests/queries/0_stateless/02725_database_hdfs.sh | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index 9f63e757b27..932a0786921 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -5,7 +5,6 @@ test1 test2 Test 2: check exceptions BAD_ARGUMENTS -CANNOT_EXTRACT_TABLE_STRUCTURE BAD_ARGUMENTS BAD_ARGUMENTS CANNOT_EXTRACT_TABLE_STRUCTURE diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index dab2d37605c..c258042a917 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -38,14 +38,14 @@ echo "Test 2: check exceptions" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" +""" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" From 48d736f8fdd7bfac6581de7702527445fd7b05bd Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 2 Aug 2023 22:34:18 +0200 Subject: [PATCH 071/113] select only required columns from system.databases --- src/Storages/SelectQueryInfo.h | 3 ++ src/Storages/System/IStorageSystemOneBlock.h | 13 +++++++- src/Storages/System/StorageSystemColumns.cpp | 17 ++-------- .../StorageSystemDataSkippingIndices.cpp | 15 ++------- .../System/StorageSystemDatabases.cpp | 24 ++++++++++---- src/Storages/System/StorageSystemDatabases.h | 2 ++ .../System/StorageSystemDetachedParts.cpp | 33 +++++-------------- .../System/StorageSystemPartsBase.cpp | 16 ++------- src/Storages/System/StorageSystemTables.cpp | 17 ++-------- .../System/getQueriedColumnsMaskAndHeader.cpp | 24 ++++++++++++++ .../System/getQueriedColumnsMaskAndHeader.h | 11 +++++++ 11 files changed, 86 insertions(+), 89 deletions(-) create mode 100644 src/Storages/System/getQueriedColumnsMaskAndHeader.cpp create mode 100644 src/Storages/System/getQueriedColumnsMaskAndHeader.h diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 0f75562e0c1..8a3451e0662 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -262,6 +262,9 @@ struct SelectQueryInfo // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; + /// For IStorageSystemOneBlock + std::vector columns_mask; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 63b9a443f95..e09b27adf32 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -30,6 +32,8 @@ class IStorageSystemOneBlock : public IStorage protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual bool supportsColumnsMask() const { return false; } + public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { @@ -48,8 +52,15 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + + if (supportsColumnsMask()) + { + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + query_info.columns_mask = std::move(columns_mask); + sample_block = std::move(header); + } + MutableColumns res_columns = sample_block.cloneEmptyColumns(); fillData(res_columns, context, query_info); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index e4ca6a15138..b76ad07abeb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -315,23 +316,9 @@ Pipe StorageSystemColumns::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); Block block_to_filter; Storages storages; diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2649cf71182..0c4eb197efd 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -185,21 +186,9 @@ Pipe StorageSystemDataSkippingIndices::read( size_t /* num_streams */) { storage_snapshot->check(column_names); - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); MutableColumnPtr column = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2fcc91e49bb..1fa94fab7bf 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -117,13 +117,23 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c const auto & database = databases.at(database_name); - res_columns[0]->insert(database_name); - res_columns[1]->insert(database->getEngineName()); - res_columns[2]->insert(context->getPath() + database->getDataPath()); - res_columns[3]->insert(database->getMetadataPath()); - res_columns[4]->insert(database->getUUID()); - res_columns[5]->insert(getEngineFull(context, database)); - res_columns[6]->insert(database->getDatabaseComment()); + size_t src_index = 0; + size_t res_index = 0; + const auto & columns_mask = query_info.columns_mask; + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getEngineName()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(context->getPath() + database->getDataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getMetadataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getUUID()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(getEngineFull(context, database)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getDatabaseComment()); } } diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 37c5f97d497..29dd786ca0a 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -26,6 +26,8 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; + bool supportsColumnsMask() const override { return true; } + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 97af4094e42..c5d2ba94e09 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -81,13 +82,11 @@ struct WorkerState class DetachedPartsSource : public ISource { public: - DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, - bool has_bytes_on_disk_column_) + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_) : ISource(std::move(header_)) , state(state_) , columns_mask(std::move(columns_mask_)) , block_size(block_size_) - , has_bytes_on_disk_column(has_bytes_on_disk_column_) {} String getName() const override { return "DataPartsSource"; } @@ -127,7 +126,6 @@ private: std::shared_ptr state; const std::vector columns_mask; const UInt64 block_size; - const bool has_bytes_on_disk_column; const size_t support_threads = 35; StoragesInfo current_info; @@ -149,9 +147,6 @@ private: void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { - if (!has_bytes_on_disk_column) - return; - WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) @@ -211,7 +206,9 @@ private: auto begin = detached_parts.size() - rows; std::vector> parts_sizes(rows); - calculatePartSizeOnDisk(begin, parts_sizes); + constexpr size_t bytes_on_disk_col_idx = 4; + if (columns_mask[bytes_on_disk_col_idx]) + calculatePartSizeOnDisk(begin, parts_sizes); for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { @@ -229,7 +226,7 @@ private: new_columns[res_index++]->insert(p.dir_name); if (columns_mask[src_index++]) { - chassert(has_bytes_on_disk_column); + chassert(src_index - 1 == bytes_on_disk_col_idx); size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); new_columns[res_index++]->insert(bytes_on_disk); } @@ -285,21 +282,7 @@ Pipe StorageSystemDetachedParts::read( storage_snapshot->check(column_names); Block sample_block = storage_snapshot->metadata->getSampleBlock(); - NameSet names_set(column_names.begin(), column_names.end()); - - Block header; - std::vector columns_mask(sample_block.columns()); - - for (size_t i = 0; i < columns_mask.size(); ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } - - bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto state = std::make_shared(StoragesInfoStream(query_info, context)); @@ -307,7 +290,7 @@ Pipe StorageSystemDetachedParts::read( for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 0979b9d9371..513af6cfc46 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -254,21 +255,10 @@ Pipe StorageSystemPartsBase::read( StoragesInfoStream stream(query_info, context); /// Create the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample.columns()); - for (size_t i = 0; i < sample.columns(); ++i) - { - if (names_set.contains(sample.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e00d2d95568..60dfc3a75e8 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -587,23 +588,9 @@ Pipe StorageSystemTables::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block res_block; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - res_block.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); ColumnPtr filtered_databases_column = getFilteredDatabases(query_info, context); ColumnPtr filtered_tables_column = getFilteredTables(query_info.query, filtered_databases_column, context); diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp new file mode 100644 index 00000000000..c29ccb590ed --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names) +{ + std::vector columns_mask(sample_block.columns()); + Block header; + + NameSet names_set(column_names.begin(), column_names.end()); + for (size_t i = 0; i < columns_mask.size(); ++i) + { + if (names_set.contains(sample_block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(sample_block.getByPosition(i)); + } + } + + return std::make_pair(columns_mask, header); +} + +} diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.h b/src/Storages/System/getQueriedColumnsMaskAndHeader.h new file mode 100644 index 00000000000..0781a92fa60 --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.h @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names); + +} From c4fb0a0cd520faeb61ac0cdb9b834062a558b1b4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 2 Aug 2023 23:36:54 +0200 Subject: [PATCH 072/113] fix two minor issue --- src/Databases/DatabaseFactory.cpp | 3 --- src/Interpreters/loadMetadata.cpp | 3 +++ tests/queries/0_stateless/01114_database_atomic.reference | 1 + tests/queries/0_stateless/01114_database_atomic.sh | 2 ++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 9d90c61bb41..53d5245770e 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -107,9 +107,6 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m { cckMetadataPathForOrdinary(create, metadata_path); - /// Creates store/xxx/ for Atomic - fs::create_directories(fs::path(metadata_path).parent_path()); - DatabasePtr impl = getImpl(create, metadata_path, context); if (impl && context->hasQueryContext() && context->getSettingsRef().log_queries) diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 83af2684322..105603e1f04 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -250,6 +250,9 @@ static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & dat { String path = context->getPath() + "metadata/" + database_name; String metadata_file = path + ".sql"; + if (fs::exists(metadata_file + ".tmp")) + fs::remove(metadata_file + ".tmp"); + if (fs::exists(fs::path(metadata_file))) { /// 'has_force_restore_data_flag' is true, to not fail on loading query_log table, if it is corrupted. diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference index 10a39087c57..93e89e3a2ec 100644 --- a/tests/queries/0_stateless/01114_database_atomic.reference +++ b/tests/queries/0_stateless/01114_database_atomic.reference @@ -1,3 +1,4 @@ +2 CREATE DATABASE test_01114_1\nENGINE = Atomic CREATE DATABASE test_01114_2\nENGINE = Atomic CREATE DATABASE test_01114_3\nENGINE = Ordinary diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index decbe136fc4..1b295e5f36f 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -13,6 +13,8 @@ DROP DATABASE IF EXISTS test_01114_2; DROP DATABASE IF EXISTS test_01114_3; " +$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" + $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2" $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE test_01114_3 ENGINE=Ordinary" From bde87997155abcb3014b67d78bfde0a29905a5ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 3 Aug 2023 00:16:04 +0200 Subject: [PATCH 073/113] Fix the test --- tests/queries/0_stateless/02012_get_server_port.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02012_get_server_port.sql b/tests/queries/0_stateless/02012_get_server_port.sql index cc7fecb0bf0..2cf2014cfcc 100644 --- a/tests/queries/0_stateless/02012_get_server_port.sql +++ b/tests/queries/0_stateless/02012_get_server_port.sql @@ -1,3 +1,3 @@ select getServerPort('tcp_port'); -select getServerPort('unknown'); -- { serverError 170 } +select getServerPort('unknown'); -- { serverError CLUSTER_DOESNT_EXIST } From dbaa2038b534c4d4760eb3659f7ec6f144cc3cb0 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 3 Aug 2023 02:40:06 +0200 Subject: [PATCH 074/113] Fix HTTP Header Filtering --- src/Common/HTTPHeaderFilter.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp index d2dc9452bff..c75050fae8f 100644 --- a/src/Common/HTTPHeaderFilter.cpp +++ b/src/Common/HTTPHeaderFilter.cpp @@ -33,6 +33,9 @@ void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati { std::lock_guard guard(mutex); + forbidden_headers.clear(); + forbidden_headers_regexp.clear(); + if (config.has("http_forbid_headers")) { std::vector keys; @@ -46,11 +49,6 @@ void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati forbidden_headers.insert(config.getString("http_forbid_headers." + key)); } } - else - { - forbidden_headers.clear(); - forbidden_headers_regexp.clear(); - } } } From b9596f0e208ffec5fbeb325f99307c2b039b096c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 3 Aug 2023 03:01:33 +0200 Subject: [PATCH 075/113] Fix style --- src/Common/HTTPHeaderFilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp index c75050fae8f..622bc475d33 100644 --- a/src/Common/HTTPHeaderFilter.cpp +++ b/src/Common/HTTPHeaderFilter.cpp @@ -35,7 +35,7 @@ void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati forbidden_headers.clear(); forbidden_headers_regexp.clear(); - + if (config.has("http_forbid_headers")) { std::vector keys; From b338e2fcecfddd9f690176e40482fa2e483556f6 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Thu, 3 Aug 2023 02:45:37 +0000 Subject: [PATCH 076/113] Added peak_memory_usage to final progress --- src/Common/ProgressIndication.cpp | 4 ++++ tests/queries/0_stateless/01921_test_progress_bar.py | 1 + 2 files changed, 5 insertions(+) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 5a1929d4ec2..e9e923c50d7 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -101,6 +101,10 @@ void ProgressIndication::writeFinalProgress() << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else std::cout << ". "; + + auto peak_memory_usage = getMemoryUsage().peak; + if (peak_memory_usage >= 0) + std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 89eecbc3987..54c7ae59894 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -17,3 +17,4 @@ with client(name="client1>", log=log) as client1: client1.send("SELECT number FROM numbers(1000) FORMAT Null") client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Peak memory usage: .*B" + end_of_block) From 06229c1ba6816978d92f6cddf892e283c8704a0a Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Thu, 3 Aug 2023 03:50:41 +0000 Subject: [PATCH 077/113] fix password leak in show create mysql table --- src/Databases/MySQL/DatabaseMySQL.cpp | 1 + .../test_mysql_database_engine/test.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 94e5ba1773e..a4e5c4d92ea 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -144,6 +144,7 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context auto table_storage_define = database_engine_define->clone(); { ASTStorage * ast_storage = table_storage_define->as(); + ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE; ASTs storage_children = ast_storage->children; auto storage_engine_arguments = ast_storage->engine->arguments; diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 18dde5307fd..1d713823dc8 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -1021,3 +1021,21 @@ def test_memory_leak(started_cluster): clickhouse_node.query("DROP DATABASE test_database") clickhouse_node.restart_clickhouse() + +def test_password_leak(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + ) + assert "clickhouse" not in clickhouse_node.query("SHOW CREATE test_database.test_table") From 9bcb980511119c8668aca29d408c1ed9525a12d3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 3 Aug 2023 06:13:18 +0200 Subject: [PATCH 078/113] tests: fix 01293_client_interactive_vertical_multiline flakiness (increase timeout) All other expect tests has timeout of 60. And yes, sometimes I believe that on CI client spawning can took > 10 seconds for debug build. Signed-off-by: Azat Khuzhin --- .../01293_client_interactive_vertical_multiline.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 3bfd454bb1f..25933777f9f 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -6,7 +6,7 @@ exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 set history_file $env(CLICKHOUSE_TMP)/$basename.history log_user 0 -set timeout 10 +set timeout 60 match_max 100000 expect_after { From c7fcc611b2b8446babbab41e7b9b6449c87a992a Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Thu, 3 Aug 2023 08:27:58 +0000 Subject: [PATCH 079/113] added TSAN option report_atomic_races=0 --- .../integration/test_profile_max_sessions_for_user/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 72addd79ec5..2930262f63e 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -51,7 +51,12 @@ instance = cluster.add_instance( "configs/server.key", ], user_configs=["configs/users.xml"], - env_variables={"UBSAN_OPTIONS": "print_stacktrace=1"}, + env_variables={ + "UBSAN_OPTIONS": "print_stacktrace=1", + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + "TSAN_OPTIONS": "report_atomic_races=0 " + + os.getenv("TSAN_OPTIONS", default=""), + }, ) From cc7f771093e98de0fc4f01a067f6d68c184549b0 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Thu, 3 Aug 2023 09:15:45 +0000 Subject: [PATCH 080/113] Fix for postgres as well --- .../PostgreSQL/DatabasePostgreSQL.cpp | 1 + src/Databases/SQLite/DatabaseSQLite.cpp | 1 + .../test_mysql_database_engine/test.py | 5 +++- .../test_postgresql_database_engine/test.py | 30 +++++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 812a0d8717e..1116c972f94 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -390,6 +390,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co auto create_table_query = std::make_shared(); auto table_storage_define = database_engine_define->clone(); + table_storage_define->as()->engine->kind = ASTFunction::Kind::TABLE_ENGINE; create_table_query->set(create_table_query->storage, table_storage_define); auto columns_declare_list = std::make_shared(); diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index 1cba9d1dc26..d031fd8e420 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -187,6 +187,7 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex } auto table_storage_define = database_engine_define->clone(); ASTStorage * ast_storage = table_storage_define->as(); + ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE; auto storage_engine_arguments = ast_storage->engine->arguments; auto table_id = storage->getStorageID(); /// Add table_name to engine arguments diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 1d713823dc8..00b5eb9e8aa 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -1022,6 +1022,7 @@ def test_memory_leak(started_cluster): clickhouse_node.query("DROP DATABASE test_database") clickhouse_node.restart_clickhouse() + def test_password_leak(started_cluster): with contextlib.closing( MySQLNodeInstance( @@ -1038,4 +1039,6 @@ def test_password_leak(started_cluster): clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" ) - assert "clickhouse" not in clickhouse_node.query("SHOW CREATE test_database.test_table") + assert "clickhouse" not in clickhouse_node.query( + "SHOW CREATE test_database.test_table" + ) diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 59a464f9020..b14d234f7eb 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -400,6 +400,36 @@ def test_datetime(started_cluster): assert "DateTime64(6)" in node1.query("show create table pg.test") +def test_postgresql_password_leak(started_cluster): + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) + cursor = conn.cursor() + + cursor.execute("CREATE SCHEMA test_schema") + cursor.execute("CREATE TABLE test_schema.table1 (a integer)") + cursor.execute("CREATE TABLE table2 (a integer)") + + node1.query( + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" + ) + + node1.query( + "CREATE DATABASE postgres_database2 ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" + ) + + assert "mysecretpassword" not in node1.query("SHOW CREATE postgres_database.table1") + assert "mysecretpassword" not in node1.query( + "SHOW CREATE postgres_database2.table2" + ) + + node1.query("DROP DATABASE postgres_database") + node1.query("DROP DATABASE postgres_database2") + + cursor.execute("DROP SCHEMA test_schema CASCADE") + cursor.execute("DROP TABLE table2") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From 7da85cc0de3e3a2b63ccae5639231ddb31f0fe31 Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Thu, 3 Aug 2023 11:04:07 +0200 Subject: [PATCH 081/113] MaterializedMySQL: Add tests for unquoted utf8 column names in DML --- .../materialized_with_ddl.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 389d430622d..604f7a62428 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1671,22 +1671,24 @@ def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}`(id, `{table}`) VALUES(2, now())") mysql_node.query( f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted(id, {table}) VALUES(2, now())") clickhouse_node.query( f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}`", + "2\n", ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "2\n", ) # Inc sync @@ -1694,20 +1696,24 @@ def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}2`(id, `{table}`) VALUES(2, now())") check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "2\n", ) mysql_node.query( f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + mysql_node.query( + f"INSERT INTO {db}.{table}2_unquoted(id, {table}) VALUES(2, now())" + ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "2\n", ) clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") From 659c907d3884ac92d80cd5210a8886eba13c87d5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 3 Aug 2023 09:55:00 +0000 Subject: [PATCH 082/113] Update version_date.tsv and changelogs after v23.7.2.25-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.7.2.25-stable.md | 31 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.7.2.25-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index c9800e4e66d..bee4c9d7f1e 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.2.25" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index f558338b23c..efad16509ea 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.2.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 156de034a7f..036b159dc03 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.2.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.7.2.25-stable.md b/docs/changelogs/v23.7.2.25-stable.md new file mode 100644 index 00000000000..267083d8e03 --- /dev/null +++ b/docs/changelogs/v23.7.2.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 1eabc65a10f..3976c676eb5 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.7.2.25-stable 2023-08-03 v23.7.1.2470-stable 2023-07-27 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 From 9e0d27dc4d4d7f01446364eb1f4746e347fe5705 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 3 Aug 2023 13:16:32 +0300 Subject: [PATCH 083/113] don't create empty parts on drop partittion if we have a transaction (#52945) --- src/Storages/StorageMergeTree.cpp | 71 +++++++++- src/Storages/StorageMergeTree.h | 1 + tests/integration/test_transactions/test.py | 3 - .../01168_mutations_isolation.reference | 24 ++-- .../0_stateless/01168_mutations_isolation.sh | 9 +- ...alter_partition_isolation_stress.reference | 8 ++ ...69_old_alter_partition_isolation_stress.sh | 123 ++++++++++++++++++ .../01172_transaction_counters.reference | 4 - ...421_truncate_isolation_no_merges.reference | 12 -- 9 files changed, 214 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference create mode 100755 tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index a279291aef1..14ea2ff4afe 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1738,15 +1738,24 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. - auto merge_blocker = stopMergesAndWait(); waitForOutdatedPartsToBeLoaded(); + auto merge_blocker = stopMergesAndWait(); Stopwatch watch; ProfileEventsScope profile_events_scope; auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + auto data_parts_lock = lockParts(); + auto parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto parts = getVisibleDataPartsVector(query_context); @@ -1790,8 +1799,15 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + if (auto part = outdatePart(txn.get(), part_name, /*force=*/ true)) + dropPartsImpl({part}, detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); @@ -1848,8 +1864,26 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + DataPartsVector parts_to_remove; + { + auto data_parts_lock = lockParts(); + if (partition_ast && partition_ast->all) + parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + else + { + String partition_id = getPartitionIDFromQuery(partition, query_context, &data_parts_lock); + parts_to_remove = getVisibleDataPartsVectorInPartition(query_context, partition_id, data_parts_lock); + } + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + } + dropPartsImpl(std::move(parts_to_remove), detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); DataPartsVector parts; @@ -1864,12 +1898,14 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont } if (detach) + { for (const auto & part : parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); part->makeCloneInDetached("", metadata_snapshot); } + } auto future_parts = initCoverageWithNewEmptyParts(parts); @@ -1898,6 +1934,33 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont clearEmptyParts(); } +void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool detach) +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + + if (detach) + { + /// If DETACH clone parts to detached/ directory + /// NOTE: no race with background cleanup until we hold pointers to parts + for (const auto & part : parts_to_remove) + { + LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); + part->makeCloneInDetached("", metadata_snapshot); + } + } + + if (deduplication_log) + { + for (const auto & part : parts_to_remove) + deduplication_log->dropPart(part->info); + } + + if (detach) + LOG_INFO(log, "Detached {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + else + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); +} + PartitionCommandsResultInfo StorageMergeTree::attachPartition( const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */, bool attach_part, ContextPtr local_context) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 936ba1b7f18..c77e5140d75 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -237,6 +237,7 @@ private: void dropPartNoWaitNoThrow(const String & part_name) override; void dropPart(const String & part_name, bool detach, ContextPtr context) override; void dropPartition(const ASTPtr & partition, bool detach, ContextPtr context) override; + void dropPartsImpl(DataPartsVector && parts_to_remove, bool detach); PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, ContextPtr context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override; diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py index a12d30915dd..46660581223 100644 --- a/tests/integration/test_transactions/test.py +++ b/tests/integration/test_transactions/test.py @@ -105,8 +105,6 @@ def test_rollback_unfinished_on_restart1(start_cluster): "0_4_4_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" - "1_1_1_1_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" @@ -194,6 +192,5 @@ def test_rollback_unfinished_on_restart2(start_cluster): "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "0_5_5_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" ) diff --git a/tests/queries/0_stateless/01168_mutations_isolation.reference b/tests/queries/0_stateless/01168_mutations_isolation.reference index 44da63385ca..00859ce99b9 100644 --- a/tests/queries/0_stateless/01168_mutations_isolation.reference +++ b/tests/queries/0_stateless/01168_mutations_isolation.reference @@ -21,20 +21,20 @@ tx7 7 20 all_1_1_0_13 tx7 7 40 all_14_14_0 tx7 7 60 all_7_7_0_13 tx7 7 80 all_12_12_0_13 -tx7 8 20 all_1_14_2_13 -tx7 8 40 all_1_14_2_13 -tx7 8 60 all_1_14_2_13 -tx7 8 80 all_1_14_2_13 +tx7 8 20 all_1_14_1_13 +tx7 8 40 all_1_14_1_13 +tx7 8 60 all_1_14_1_13 +tx7 8 80 all_1_14_1_13 Serialization error INVALID_TRANSACTION -tx11 9 21 all_1_14_2_17 -tx11 9 41 all_1_14_2_17 -tx11 9 61 all_1_14_2_17 -tx11 9 81 all_1_14_2_17 +tx11 9 21 all_1_14_1_17 +tx11 9 41 all_1_14_1_17 +tx11 9 61 all_1_14_1_17 +tx11 9 81 all_1_14_1_17 1 1 RUNNING -tx14 10 22 all_1_14_2_18 -tx14 10 42 all_1_14_2_18 -tx14 10 62 all_1_14_2_18 -tx14 10 82 all_1_14_2_18 +tx14 10 22 all_1_14_1_18 +tx14 10 42 all_1_14_1_18 +tx14 10 62 all_1_14_1_18 +tx14 10 82 all_1_14_1_18 11 2 all_2_2_0 11 10 all_1_1_0_3 diff --git a/tests/queries/0_stateless/01168_mutations_isolation.sh b/tests/queries/0_stateless/01168_mutations_isolation.sh index 2b76e5742ac..c1d70189673 100755 --- a/tests/queries/0_stateless/01168_mutations_isolation.sh +++ b/tests/queries/0_stateless/01168_mutations_isolation.sh @@ -53,9 +53,6 @@ tx 6 "alter table mt update n=n*10 wh tx 6 "insert into mt values (40)" tx 6 "commit" -function accept_both_parts() { - sed 's/all_1_14_1_1/all_1_14_2_1/g' -} tx 7 "begin transaction" tx 7 "select 7, n, _part from mt order by n" @@ -64,7 +61,7 @@ tx_async 8 "alter table mt update n = 0 whe $CLICKHOUSE_CLIENT -q "kill mutation where database=currentDatabase() and mutation_id='mutation_15.txt' format Null" 2>&1| grep -Fv "probably it finished" tx_sync 8 "rollback" tx 7 "optimize table mt final" -tx 7 "select 8, n, _part from mt order by n" | accept_both_parts +tx 7 "select 8, n, _part from mt order by n" tx 10 "begin transaction" tx 10 "alter table mt update n = 0 where 1" | grep -Eo "Serialization error" | uniq tx 7 "alter table mt update n=n+1 where 1" @@ -74,7 +71,7 @@ tx 7 "commit" tx_async 11 "begin transaction" -tx_async 11 "select 9, n, _part from mt order by n" | accept_both_parts +tx_async 11 "select 9, n, _part from mt order by n" tx_async 12 "begin transaction" tx_async 11 "alter table mt update n=n+1 where 1" >/dev/null tx_async 12 "alter table mt update n=n+1 where 1" >/dev/null @@ -91,7 +88,7 @@ $CLICKHOUSE_CLIENT -q "kill transaction where tid=$tid_to_kill format Null" tx_sync 13 "rollback" tx 14 "begin transaction" -tx 14 "select 10, n, _part from mt order by n" | accept_both_parts +tx 14 "select 10, n, _part from mt order by n" $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table mt" diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference new file mode 100644 index 00000000000..12b941eab50 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference @@ -0,0 +1,8 @@ +1 1 +2 1 +3 1 +4 1 +1 +10 100 +1 1 1 +2 1 1 diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh new file mode 100755 index 00000000000..32ad78dead6 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# Tags: long, no-replicated-database, no-ordinary-database + +# shellcheck disable=SC2015 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; + +function thread_insert() +{ + set -e + val=1 + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); + INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); + COMMIT;" + val=$((val+1)) + sleep 0.$RANDOM; + done +} + + +# NOTE +# ALTER PARTITION query stops merges, +# but serialization error is still possible if some merge was assigned (and committed) between BEGIN and ALTER. +function thread_partition_src_to_dst() +{ + set -e + count=0 + sum=0 + for i in {1..20}; do + out=$( + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, 3) */ ($i, 3); + INSERT INTO dst SELECT * FROM src; + ALTER TABLE src DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null; + COMMIT;" 2>&1) ||: + + echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || count=$((count+1)) + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || sum=$((sum+i)) + done +} + +function thread_partition_dst_to_src() +{ + set -e + for i in {1..20}; do + action="ROLLBACK" + if (( i % 2 )); then + action="COMMIT" + fi + $CLICKHOUSE_CLIENT --multiquery --query " + SYSTEM STOP MERGES dst; + ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick + SYSTEM SYNC TRANSACTION LOG; + BEGIN TRANSACTION; + INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4); + INSERT INTO src SELECT * FROM dst; + ALTER TABLE dst DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SYSTEM START MERGES dst; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null; + $action;" + done +} + +function thread_select() +{ + set -e + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + -- no duplicates + SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; + SELECT type, throwIf(count(n) != countDistinct(n)) FROM dst GROUP BY type FORMAT Null; + -- rows inserted by thread_insert moved together + SET throw_on_unsupported_query_inside_transaction=0; + SELECT _table, throwIf(arraySort(groupArrayIf(n, type=1)) != arraySort(groupArrayIf(n, type=2))) FROM merge(currentDatabase(), '') GROUP BY _table FORMAT Null; + -- all rows are inserted in insert_thread + SELECT type, throwIf(count(n) != max(n)), throwIf(sum(n) != max(n)*(max(n)+1)/2) FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type FORMAT Null; + COMMIT;" + done +} + +thread_insert & PID_1=$! +thread_select & PID_2=$! + +thread_partition_src_to_dst & PID_3=$! +thread_partition_dst_to_src & PID_4=$! +wait $PID_3 && wait $PID_4 + +kill -TERM $PID_1 +kill -TERM $PID_2 +wait +wait_for_queries_to_finish + +$CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" + + +$CLICKHOUSE_CLIENT --query "DROP TABLE src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index d088d747ee8..24083d7d40b 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -29,13 +29,9 @@ 4 1 Commit 1 1 1 0 5 1 Begin 1 1 1 1 5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 AddPart 1 1 1 1 all_1_1_1 5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 AddPart 1 1 1 1 all_3_3_1 5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 AddPart 1 1 1 1 all_4_4_1 5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 AddPart 1 1 1 1 all_5_5_1 5 1 LockPart 1 1 1 1 all_5_5_0 5 1 UnlockPart 1 1 1 1 all_1_1_0 5 1 UnlockPart 1 1 1 1 all_3_3_0 diff --git a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference index a89ce339f6c..9f725a55439 100644 --- a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference +++ b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference @@ -5,19 +5,13 @@ tx21 3 UNKNOWN_TABLE concurrent_insert 2 -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 -all_4_4_1 0 all_5_5_0 1 -all_6_6_1 0 concurrent_drop_part_before SERIALIZATION_ERROR INVALID_TRANSACTION 1 3 all_1_1_0 1 -all_2_2_1 0 all_3_3_0 1 read_from_snapshot tx51 3 @@ -28,15 +22,9 @@ tx51 3 concurrent_drop_part_after NO_SUCH_DATA_PART INVALID_TRANSACTION -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 NewPart all_1_1_0 -NewPart all_1_1_1 NewPart all_2_2_0 -NewPart all_2_2_1 NewPart all_3_3_0 -NewPart all_3_3_1 concurrent_truncate_notx_after tx71 3 tx71 0 From 7684e680081970ebc6107f958e31e6e0b888b310 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 3 Aug 2023 13:34:43 +0300 Subject: [PATCH 084/113] Add system stop pulling replication log (#52881) * add system stop pulling replication log * add const * fix another stupid test * Update test.py --- docs/en/sql-reference/statements/system.md | 16 ++++++++++ src/Access/Common/AccessType.h | 1 + src/Access/tests/gtest_access_rights_ops.cpp | 2 +- src/Interpreters/ActionLocksManager.cpp | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 32 +++++++++++++++---- src/Interpreters/loadMetadata.cpp | 8 ++--- src/Parsers/ASTSystemQuery.cpp | 4 ++- src/Parsers/ASTSystemQuery.h | 2 ++ src/Parsers/ParserSystemQuery.cpp | 2 ++ src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++- .../integration/test_grant_and_revoke/test.py | 2 +- .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 ++-- 13 files changed, 67 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index fb601cd5d35..59970dbeccd 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -314,6 +314,22 @@ Provides possibility to start background fetch tasks from replication queues whi SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] ``` +### STOP PULLING REPLICATION LOG + +Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table. + +``` sql +SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + +### START PULLING REPLICATION LOG + +Cancels `SYSTEM STOP PULLING REPLICATION LOG`. + +``` sql +SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + ### SYNC REPLICA Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index b253a0e13ce..06507fd85c8 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -168,6 +168,7 @@ enum class AccessType M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \ M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \ M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \ + M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \ M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index c2e9501f58c..91d79be918b 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -51,7 +51,7 @@ TEST(AccessRights, Union) "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, " "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " - "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " + "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index 7b57b8803cd..d7480d45524 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -16,6 +16,7 @@ namespace ActionLocks extern const StorageActionBlockType DistributedSend = 5; extern const StorageActionBlockType PartsTTLMerge = 6; extern const StorageActionBlockType PartsMove = 7; + extern const StorageActionBlockType PullReplicationLog = 8; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index b62840945c4..9c77b40f803 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -89,13 +89,14 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType ReplicationQueue; - extern StorageActionBlockType DistributedSend; - extern StorageActionBlockType PartsTTLMerge; - extern StorageActionBlockType PartsMove; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType ReplicationQueue; + extern const StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsTTLMerge; + extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } @@ -155,6 +156,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) return AccessType::SYSTEM_TTL_MERGES; else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; + else if (action_type == ActionLocks::PullReplicationLog) + return AccessType::SYSTEM_PULLING_REPLICATION_LOG; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } @@ -513,6 +516,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_DISTRIBUTED_SENDS: startStopAction(ActionLocks::DistributedSend, true); break; + case Type::STOP_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, false); + break; + case Type::START_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, true); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -1090,6 +1099,15 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_MOVES, query.getDatabase(), query.getTable()); break; } + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: + { + if (!query.table) + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG); + else + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable()); + break; + } case Type::STOP_FETCHES: case Type::START_FETCHES: { diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 83af2684322..84326e40bf6 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -45,10 +45,10 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType DistributedSend; } static void executeCreateQuery( diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 3385844cb4f..fb10474a4d4 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -162,7 +162,9 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS) + || type == Type::START_DISTRIBUTED_SENDS + || type == Type::STOP_PULLING_REPLICATION_LOG + || type == Type::START_PULLING_REPLICATION_LOG) { if (table) print_database_table(); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index f97f4465621..9d15e7cacee 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -80,6 +80,8 @@ public: UNFREEZE, ENABLE_FAILPOINT, DISABLE_FAILPOINT, + STOP_PULLING_REPLICATION_LOG, + START_PULLING_REPLICATION_LOG, END }; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 3af0d62f769..40fc1acae69 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -379,6 +379,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2c2cea0af2b..23683ec2313 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -197,6 +197,7 @@ namespace ActionLocks extern const StorageActionBlockType ReplicationQueue; extern const StorageActionBlockType PartsTTLMerge; extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } @@ -4340,7 +4341,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); - LOG_DEBUG(log, "Cleaning up last parent node for partition {}", partition_id); + LOG_DEBUG(log, "Cleaning up last part node for partition {}", partition_id); /// The name of the previous part for which the quorum was reached. const String quorum_last_part_path = fs::path(zookeeper_path) / "quorum" / "last_part"; @@ -4361,6 +4362,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) if (!parts_with_quorum.added_parts.contains(partition_id)) { /// There is no information about interested part. + LOG_TEST(log, "There is no information about the partition"); break; } @@ -4378,6 +4380,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) else if (code == Coordination::Error::ZNONODE) { /// Node is deleted. It is impossible, but it is Ok. + LOG_WARNING(log, "The last part node {} was deleted", quorum_last_part_path); break; } else if (code == Coordination::Error::ZBADVERSION) @@ -8169,6 +8172,9 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti if (action_type == ActionLocks::PartsMove) return parts_mover.moves_blocker.cancel(); + if (action_type == ActionLocks::PullReplicationLog) + return queue.pull_log_blocker.cancel(); + return {}; } diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index ee5d4b5df93..8a8eb11d91f 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -188,7 +188,7 @@ def test_grant_all_on_table(): instance.query("SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index db0f2d8235b..a3e15f0793c 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -119,6 +119,7 @@ SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START M SYSTEM TTL MERGES ['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL MERGES','START TTL MERGES'] TABLE SYSTEM SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES'] TABLE SYSTEM SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM +SYSTEM PULLING REPLICATION LOG ['SYSTEM STOP PULLING REPLICATION LOG','SYSTEM START PULLING REPLICATION LOG'] TABLE SYSTEM SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index acff6e0efb2..5b678537248 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -298,7 +298,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -586,10 +586,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 141a3df727dba39c20546141e57c3c202c5c9d2b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 3 Aug 2023 12:35:14 +0200 Subject: [PATCH 085/113] fix --- src/Databases/MySQL/DatabaseMySQL.cpp | 2 ++ src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 94e5ba1773e..434e702125c 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -77,6 +77,8 @@ DatabaseMySQL::DatabaseMySQL( throw; } + fs::create_directories(metadata_path); + thread = ThreadFromGlobalPool{&DatabaseMySQL::cleanOutdatedTables, this}; } diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 812a0d8717e..3a2ea8c97eb 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -54,6 +54,7 @@ DatabasePostgreSQL::DatabasePostgreSQL( , cache_tables(cache_tables_) , log(&Poco::Logger::get("DatabasePostgreSQL(" + dbname_ + ")")) { + fs::create_directories(metadata_path); cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); }); cleaner_task->deactivate(); } From 84df903958afa1cad2c6ba652c373a186131d7f2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 3 Aug 2023 10:48:24 +0000 Subject: [PATCH 086/113] Decrease a num of tries for a couple of too slow tests for debug. --- tests/queries/0_stateless/00719_parallel_ddl_db.sh | 2 +- .../0_stateless/02450_kill_distributed_query_deadlock.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index 31ea1dbbe58..004590c21df 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -11,7 +11,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" done diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index abcf1bf4c5b..d15acba3837 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Test that running distributed query and cancel it ASAP, # this can trigger a hung/deadlock in ProcessorList. -for i in {1..100}; do +for i in {1..50}; do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & while :; do From 16dfb028b68b6c18185582d534c3b1d948ef88f4 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 16 Jul 2023 13:40:47 +0200 Subject: [PATCH 087/113] Remove creation of a unnecessary temporary ContextAccess on login. --- src/Access/AccessControl.cpp | 18 +++++++++++++ src/Access/AccessControl.h | 11 ++++++++ src/Interpreters/Context.cpp | 49 +++++++++++------------------------- src/Interpreters/Context.h | 6 ++--- 4 files changed, 46 insertions(+), 38 deletions(-) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index bf0a2a0fbba..05cba7f8510 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -729,6 +730,14 @@ std::shared_ptr AccessControl::getEnabledRoles( } +std::shared_ptr AccessControl::getEnabledRolesInfo( + const std::vector & current_roles, + const std::vector & current_roles_with_admin_option) const +{ + return getEnabledRoles(current_roles, current_roles_with_admin_option)->getRolesInfo(); +} + + std::shared_ptr AccessControl::getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set & enabled_roles) const { return row_policy_cache->getEnabledRowPolicies(user_id, enabled_roles); @@ -772,6 +781,15 @@ std::shared_ptr AccessControl::getEnabledSettings( return settings_profiles_cache->getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles); } +std::shared_ptr AccessControl::getEnabledSettingsInfo( + const UUID & user_id, + const SettingsProfileElements & settings_from_user, + const boost::container::flat_set & enabled_roles, + const SettingsProfileElements & settings_from_enabled_roles) const +{ + return getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles)->getInfo(); +} + std::shared_ptr AccessControl::getSettingsProfileInfo(const UUID & profile_id) { return settings_profiles_cache->getSettingsProfileInfo(profile_id); diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 74816090f88..c7b94955a47 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -29,6 +29,7 @@ class ContextAccessParams; struct User; using UserPtr = std::shared_ptr; class EnabledRoles; +struct EnabledRolesInfo; class RoleCache; class EnabledRowPolicies; class RowPolicyCache; @@ -187,6 +188,10 @@ public: const std::vector & current_roles, const std::vector & current_roles_with_admin_option) const; + std::shared_ptr getEnabledRolesInfo( + const std::vector & current_roles, + const std::vector & current_roles_with_admin_option) const; + std::shared_ptr getEnabledRowPolicies( const UUID & user_id, const boost::container::flat_set & enabled_roles) const; @@ -209,6 +214,12 @@ public: const boost::container::flat_set & enabled_roles, const SettingsProfileElements & settings_from_enabled_roles) const; + std::shared_ptr getEnabledSettingsInfo( + const UUID & user_id, + const SettingsProfileElements & settings_from_user, + const boost::container::flat_set & enabled_roles, + const SettingsProfileElements & settings_from_enabled_roles) const; + std::shared_ptr getSettingsProfileInfo(const UUID & profile_id); const ExternalAuthenticators & getExternalAuthenticators() const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a5ff7643294..b140d8cab2d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1089,52 +1089,31 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_) +void Context::setUser(const UUID & user_id_) { /// Prepare lists of user's profiles, constraints, settings, roles. + /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, + /// so Context::getLock() must be unlocked while we're doing this. - std::shared_ptr user; - std::shared_ptr temp_access; - if (set_current_profiles_ || set_current_roles_ || set_current_database_) - { - std::optional params; - { - auto lock = getLock(); - params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info }); - } - /// `temp_access` is used here only to extract information about the user, not to actually check access. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. - temp_access = getAccessControl().getContextAccess(*params); - user = temp_access->getUser(); - } - - std::shared_ptr profiles; - if (set_current_profiles_) - profiles = temp_access->getDefaultProfileInfo(); - - std::optional> roles; - if (set_current_roles_) - roles = user->granted_roles.findGranted(user->default_roles); - - String database; - if (set_current_database_) - database = user->default_database; + auto user = getAccessControl().read(user_id_); + auto default_roles = user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = getAccessControl().getEnabledRolesInfo(default_roles, {}); + auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); + const auto & database = user->default_database; /// Apply user's profiles, constraints, settings, roles. + auto lock = getLock(); setUserID(user_id_); - if (profiles) - { - /// A profile can specify a value and a readonly constraint for same setting at the same time, - /// so we shouldn't check constraints here. - setCurrentProfiles(*profiles, /* check_constraints= */ false); - } + /// A profile can specify a value and a readonly constraint for same setting at the same time, + /// so we shouldn't check constraints here. + setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false); - if (roles) - setCurrentRoles(*roles); + setCurrentRoles(default_roles); + /// It's optional to specify the DEFAULT DATABASE in the user's definition. if (!database.empty()) setCurrentDatabase(database); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 676eb8412e5..753e4dbf6f1 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -534,12 +534,10 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, bool set_current_profiles_ = true, bool set_current_roles_ = true, bool set_current_database_ = true); + void setUser(const UUID & user_id_); UserPtr getUser() const; - void setUserID(const UUID & user_id_); std::optional getUserID() const; - String getUserName() const; void setCurrentRoles(const std::vector & current_roles_); @@ -1195,6 +1193,8 @@ private: void initGlobal(); + void setUserID(const UUID & user_id_); + template void checkAccessImpl(const Args &... args) const; From 7a112459d3d52aa32962eb8d4c22d9f59f070ea6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 27 Jul 2023 16:23:19 +0200 Subject: [PATCH 088/113] Implement passing current roles to Context::setUser(). --- src/Interpreters/Context.cpp | 9 +++++---- src/Interpreters/Context.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b140d8cab2d..e6b9b12b6ca 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1089,15 +1089,16 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_) +void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) { /// Prepare lists of user's profiles, constraints, settings, roles. /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, /// so Context::getLock() must be unlocked while we're doing this. auto user = getAccessControl().read(user_id_); - auto default_roles = user->granted_roles.findGranted(user->default_roles); - auto enabled_roles = getAccessControl().getEnabledRolesInfo(default_roles, {}); + + auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = getAccessControl().getEnabledRolesInfo(new_current_roles, {}); auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); const auto & database = user->default_database; @@ -1111,7 +1112,7 @@ void Context::setUser(const UUID & user_id_) /// so we shouldn't check constraints here. setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false); - setCurrentRoles(default_roles); + setCurrentRoles(new_current_roles); /// It's optional to specify the DEFAULT DATABASE in the user's definition. if (!database.empty()) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 753e4dbf6f1..531207de60e 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -534,7 +534,7 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_); + void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); UserPtr getUser() const; std::optional getUserID() const; From dc740db3fb9cbd7a289424108ebe652badb01b05 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 3 Aug 2023 13:46:57 +0200 Subject: [PATCH 089/113] Fix test 00061_storage_buffer --- tests/queries/1_stateful/00061_storage_buffer.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00061_storage_buffer.sql b/tests/queries/1_stateful/00061_storage_buffer.sql index e1f67abda20..e3cda3de36d 100644 --- a/tests/queries/1_stateful/00061_storage_buffer.sql +++ b/tests/queries/1_stateful/00061_storage_buffer.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test.hits_dst; DROP TABLE IF EXISTS test.hits_buffer; CREATE TABLE test.hits_dst AS test.hits; -CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 1, 10, 10000, 100000, 10000000, 100000000); +CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 600, 600, 1000000, 1000000, 100000000, 1000000000); INSERT INTO test.hits_buffer SELECT * FROM test.hits WHERE CounterID = 800784; SELECT count() FROM test.hits_buffer; From f427c779ebf854e1aa842063f9d497a48b746bdc Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 20 Jul 2023 09:46:22 +0200 Subject: [PATCH 090/113] Disable using fs cache for backup/restore. --- src/Backups/BackupEntryFromImmutableFile.cpp | 8 ++-- src/Backups/BackupEntryFromImmutableFile.h | 4 +- .../BackupEntryWithChecksumCalculation.cpp | 15 +++--- .../BackupEntryWithChecksumCalculation.h | 4 +- src/Backups/BackupEntryWrappedWith.h | 4 +- src/Backups/BackupFactory.h | 4 ++ src/Backups/BackupFileInfo.cpp | 25 ++++++---- src/Backups/BackupFileInfo.h | 5 +- src/Backups/BackupIO_Default.cpp | 13 +++--- src/Backups/BackupIO_Default.h | 5 +- src/Backups/BackupIO_Disk.cpp | 8 ++-- src/Backups/BackupIO_Disk.h | 4 +- src/Backups/BackupIO_File.cpp | 8 ++-- src/Backups/BackupIO_File.h | 4 +- src/Backups/BackupIO_S3.cpp | 21 +++++++-- src/Backups/BackupIO_S3.h | 4 +- src/Backups/BackupSettings.cpp | 1 + src/Backups/BackupSettings.h | 4 ++ src/Backups/BackupsWorker.cpp | 46 +++++++++++++++++-- src/Backups/BackupsWorker.h | 3 +- src/Backups/IBackupEntriesLazyBatch.cpp | 4 +- src/Backups/IBackupEntry.h | 4 +- src/Backups/registerBackupEngineS3.cpp | 19 +++++++- .../registerBackupEnginesFileAndDisk.cpp | 8 ++-- src/Interpreters/Context.cpp | 8 ---- src/Interpreters/Context.h | 3 -- src/Storages/MergeTree/MergeTreeData.cpp | 1 - 27 files changed, 152 insertions(+), 85 deletions(-) diff --git a/src/Backups/BackupEntryFromImmutableFile.cpp b/src/Backups/BackupEntryFromImmutableFile.cpp index 93d555065ec..77ebf6232d4 100644 --- a/src/Backups/BackupEntryFromImmutableFile.cpp +++ b/src/Backups/BackupEntryFromImmutableFile.cpp @@ -57,7 +57,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const return *file_size; } -UInt128 BackupEntryFromImmutableFile::getChecksum() const +UInt128 BackupEntryFromImmutableFile::getChecksum(const ReadSettings & read_settings) const { { std::lock_guard lock{size_and_checksum_mutex}; @@ -73,7 +73,7 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const } } - auto calculated_checksum = BackupEntryWithChecksumCalculation::getChecksum(); + auto calculated_checksum = BackupEntryWithChecksumCalculation::getChecksum(read_settings); { std::lock_guard lock{size_and_checksum_mutex}; @@ -86,13 +86,13 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const } } -std::optional BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const +std::optional BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const { if (prefix_length == 0) return 0; if (prefix_length >= getSize()) - return getChecksum(); + return getChecksum(read_settings); /// For immutable files we don't use partial checksums. return std::nullopt; diff --git a/src/Backups/BackupEntryFromImmutableFile.h b/src/Backups/BackupEntryFromImmutableFile.h index 37bc6b43cd3..9e3dc8ebb31 100644 --- a/src/Backups/BackupEntryFromImmutableFile.h +++ b/src/Backups/BackupEntryFromImmutableFile.h @@ -27,8 +27,8 @@ public: std::unique_ptr getReadBuffer(const ReadSettings & read_settings) const override; UInt64 getSize() const override; - UInt128 getChecksum() const override; - std::optional getPartialChecksum(size_t prefix_length) const override; + UInt128 getChecksum(const ReadSettings & read_settings) const override; + std::optional getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override; DataSourceDescription getDataSourceDescription() const override { return data_source_description; } bool isEncryptedByDisk() const override { return copy_encrypted; } diff --git a/src/Backups/BackupEntryWithChecksumCalculation.cpp b/src/Backups/BackupEntryWithChecksumCalculation.cpp index 610b46238ba..a507e1b0a84 100644 --- a/src/Backups/BackupEntryWithChecksumCalculation.cpp +++ b/src/Backups/BackupEntryWithChecksumCalculation.cpp @@ -6,7 +6,7 @@ namespace DB { template -UInt128 BackupEntryWithChecksumCalculation::getChecksum() const +UInt128 BackupEntryWithChecksumCalculation::getChecksum(const ReadSettings & read_settings) const { { std::lock_guard lock{checksum_calculation_mutex}; @@ -26,7 +26,7 @@ UInt128 BackupEntryWithChecksumCalculation::getChecksum() const } else { - auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(size)); + auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(size)); HashingReadBuffer hashing_read_buffer(*read_buffer); hashing_read_buffer.ignoreAll(); calculated_checksum = hashing_read_buffer.getHash(); @@ -37,23 +37,20 @@ UInt128 BackupEntryWithChecksumCalculation::getChecksum() const } template -std::optional BackupEntryWithChecksumCalculation::getPartialChecksum(size_t prefix_length) const +std::optional BackupEntryWithChecksumCalculation::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const { if (prefix_length == 0) return 0; size_t size = this->getSize(); if (prefix_length >= size) - return this->getChecksum(); + return this->getChecksum(read_settings); std::lock_guard lock{checksum_calculation_mutex}; - ReadSettings read_settings; - if (calculated_checksum) - read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size); - - auto read_buffer = this->getReadBuffer(read_settings); + auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size)); HashingReadBuffer hashing_read_buffer(*read_buffer); + hashing_read_buffer.ignore(prefix_length); auto partial_checksum = hashing_read_buffer.getHash(); diff --git a/src/Backups/BackupEntryWithChecksumCalculation.h b/src/Backups/BackupEntryWithChecksumCalculation.h index 32701ab9952..99ed4a32462 100644 --- a/src/Backups/BackupEntryWithChecksumCalculation.h +++ b/src/Backups/BackupEntryWithChecksumCalculation.h @@ -11,8 +11,8 @@ template class BackupEntryWithChecksumCalculation : public Base { public: - UInt128 getChecksum() const override; - std::optional getPartialChecksum(size_t prefix_length) const override; + UInt128 getChecksum(const ReadSettings & read_settings) const override; + std::optional getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override; private: mutable std::optional calculated_checksum; diff --git a/src/Backups/BackupEntryWrappedWith.h b/src/Backups/BackupEntryWrappedWith.h index f865d529206..7f04c135921 100644 --- a/src/Backups/BackupEntryWrappedWith.h +++ b/src/Backups/BackupEntryWrappedWith.h @@ -17,8 +17,8 @@ public: std::unique_ptr getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); } UInt64 getSize() const override { return entry->getSize(); } - UInt128 getChecksum() const override { return entry->getChecksum(); } - std::optional getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); } + UInt128 getChecksum(const ReadSettings & read_settings) const override { return entry->getChecksum(read_settings); } + std::optional getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return entry->getPartialChecksum(prefix_length, read_settings); } DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); } bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); } bool isFromFile() const override { return entry->isFromFile(); } diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index a79c6d354fc..ecdbd5cffbc 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -37,6 +39,8 @@ public: std::optional backup_uuid; bool deduplicate_files = true; bool allow_s3_native_copy = true; + ReadSettings read_settings; + WriteSettings write_settings; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp index d539ada55c4..f595c02ddc5 100644 --- a/src/Backups/BackupFileInfo.cpp +++ b/src/Backups/BackupFileInfo.cpp @@ -57,12 +57,12 @@ namespace /// Calculate checksum for backup entry if it's empty. /// Also able to calculate additional checksum of some prefix. - ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size) + ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size, const ReadSettings & read_settings) { ChecksumsForNewEntry res; /// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation. - res.prefix_checksum = entry->getPartialChecksum(prefix_size); - res.full_checksum = entry->getChecksum(); + res.prefix_checksum = entry->getPartialChecksum(prefix_size, read_settings); + res.full_checksum = entry->getChecksum(read_settings); return res; } @@ -93,7 +93,12 @@ String BackupFileInfo::describe() const } -BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log) +BackupFileInfo buildFileInfoForBackupEntry( + const String & file_name, + const BackupEntryPtr & backup_entry, + const BackupPtr & base_backup, + const ReadSettings & read_settings, + Poco::Logger * log) { auto adjusted_path = removeLeadingSlash(file_name); @@ -126,7 +131,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu /// File with the same name but smaller size exist in previous backup if (check_base == CheckBackupResult::HasPrefix) { - auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first); + auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first, read_settings); info.checksum = checksums.full_checksum; /// We have prefix of this file in backup with the same checksum. @@ -146,7 +151,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu { /// We have full file or have nothing, first of all let's get checksum /// of current file - auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0); + auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings); info.checksum = checksums.full_checksum; if (info.checksum == base_backup_file_info->second) @@ -169,7 +174,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu } else { - auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0); + auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings); info.checksum = checksums.full_checksum; } @@ -188,7 +193,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu return info; } -BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool) +BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool) { BackupFileInfos infos; infos.resize(backup_entries.size()); @@ -210,7 +215,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr ++num_active_jobs; } - auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &base_backup, &thread_group, i, log](bool async) + auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, i, log](bool async) { SCOPE_EXIT_SAFE({ std::lock_guard lock{mutex}; @@ -237,7 +242,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr return; } - infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, log); + infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, read_settings, log); } catch (...) { diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h index a925a1e81ac..63da6f23427 100644 --- a/src/Backups/BackupFileInfo.h +++ b/src/Backups/BackupFileInfo.h @@ -13,6 +13,7 @@ class IBackupEntry; using BackupPtr = std::shared_ptr; using BackupEntryPtr = std::shared_ptr; using BackupEntries = std::vector>; +struct ReadSettings; /// Information about a file stored in a backup. @@ -66,9 +67,9 @@ struct BackupFileInfo using BackupFileInfos = std::vector; /// Builds a BackupFileInfo for a specified backup entry. -BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log); +BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, const ReadSettings & read_settings, Poco::Logger * log); /// Builds a vector of BackupFileInfos for specified backup entries. -BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool); +BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool); } diff --git a/src/Backups/BackupIO_Default.cpp b/src/Backups/BackupIO_Default.cpp index b36cb22498d..5ac522695ce 100644 --- a/src/Backups/BackupIO_Default.cpp +++ b/src/Backups/BackupIO_Default.cpp @@ -4,17 +4,16 @@ #include #include #include -#include #include namespace DB { -BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_) +BackupReaderDefault::BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_) : log(log_) - , read_settings(context_->getBackupReadSettings()) - , write_settings(context_->getWriteSettings()) + , read_settings(read_settings_) + , write_settings(write_settings_) , write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE) { } @@ -37,10 +36,10 @@ void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t f write_buffer->finalize(); } -BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_) +BackupWriterDefault::BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_) : log(log_) - , read_settings(context_->getBackupReadSettings()) - , write_settings(context_->getWriteSettings()) + , read_settings(read_settings_) + , write_settings(write_settings_) , write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE) { } diff --git a/src/Backups/BackupIO_Default.h b/src/Backups/BackupIO_Default.h index ad7bdf15d9f..b4888fecd2f 100644 --- a/src/Backups/BackupIO_Default.h +++ b/src/Backups/BackupIO_Default.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB @@ -19,7 +18,7 @@ enum class WriteMode; class BackupReaderDefault : public IBackupReader { public: - BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_); + BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_); ~BackupReaderDefault() override = default; /// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk. @@ -46,7 +45,7 @@ protected: class BackupWriterDefault : public IBackupWriter { public: - BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_); + BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_); ~BackupWriterDefault() override = default; bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override; diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp index 1514b4c24c7..21b3afbddf8 100644 --- a/src/Backups/BackupIO_Disk.cpp +++ b/src/Backups/BackupIO_Disk.cpp @@ -8,8 +8,8 @@ namespace DB { -BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_) - : BackupReaderDefault(&Poco::Logger::get("BackupReaderDisk"), context_) +BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_) + : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderDisk")) , disk(disk_) , root_path(root_path_) , data_source_description(disk->getDataSourceDescription()) @@ -56,8 +56,8 @@ void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file } -BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_) - : BackupWriterDefault(&Poco::Logger::get("BackupWriterDisk"), context_) +BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_) + : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterDisk")) , disk(disk_) , root_path(root_path_) , data_source_description(disk->getDataSourceDescription()) diff --git a/src/Backups/BackupIO_Disk.h b/src/Backups/BackupIO_Disk.h index faf4ef03447..70d31eacc1a 100644 --- a/src/Backups/BackupIO_Disk.h +++ b/src/Backups/BackupIO_Disk.h @@ -13,7 +13,7 @@ using DiskPtr = std::shared_ptr; class BackupReaderDisk : public BackupReaderDefault { public: - BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_); + BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_); ~BackupReaderDisk() override; bool fileExists(const String & file_name) override; @@ -33,7 +33,7 @@ private: class BackupWriterDisk : public BackupWriterDefault { public: - BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_); + BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_); ~BackupWriterDisk() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp index e1a3f336521..2bedb5470fb 100644 --- a/src/Backups/BackupIO_File.cpp +++ b/src/Backups/BackupIO_File.cpp @@ -16,8 +16,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -BackupReaderFile::BackupReaderFile(const String & root_path_, const ContextPtr & context_) - : BackupReaderDefault(&Poco::Logger::get("BackupReaderFile"), context_) +BackupReaderFile::BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_) + : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderFile")) , root_path(root_path_) , data_source_description(DiskLocal::getLocalDataSourceDescription(root_path)) { @@ -74,8 +74,8 @@ void BackupReaderFile::copyFileToDisk(const String & path_in_backup, size_t file } -BackupWriterFile::BackupWriterFile(const String & root_path_, const ContextPtr & context_) - : BackupWriterDefault(&Poco::Logger::get("BackupWriterFile"), context_) +BackupWriterFile::BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_) + : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterFile")) , root_path(root_path_) , data_source_description(DiskLocal::getLocalDataSourceDescription(root_path)) { diff --git a/src/Backups/BackupIO_File.h b/src/Backups/BackupIO_File.h index fd2c0b07158..6bb4b11e134 100644 --- a/src/Backups/BackupIO_File.h +++ b/src/Backups/BackupIO_File.h @@ -11,7 +11,7 @@ namespace DB class BackupReaderFile : public BackupReaderDefault { public: - explicit BackupReaderFile(const String & root_path_, const ContextPtr & context_); + explicit BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_); bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; @@ -29,7 +29,7 @@ private: class BackupWriterFile : public BackupWriterDefault { public: - BackupWriterFile(const String & root_path_, const ContextPtr & context_); + BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_); bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 31a33ea1a79..56402187703 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,8 +101,14 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_) - : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_) + : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderS3")) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) @@ -178,8 +184,15 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_) - : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const String & storage_class_name, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_) + : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterS3")) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 8015dade60d..a29c91498ec 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -38,7 +38,7 @@ private: class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 3138959191e..43259e6958a 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -27,6 +27,7 @@ namespace ErrorCodes M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ M(Bool, allow_s3_native_copy) \ + M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index dabfe9a600f..2a950ef1b4f 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -44,6 +44,10 @@ struct BackupSettings /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) bool allow_s3_native_copy = true; + /// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, + /// but don't put more entries into the cache. + bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. size_t shard_num = 0; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 287560d1e5c..8a87f168a0d 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -178,6 +178,42 @@ namespace { return status == BackupStatus::RESTORING; } + + /// We use slightly different read and write settings for backup/restore + /// with a separate throttler and limited usage of filesystem cache. + ReadSettings getReadSettingsForBackup(const ContextPtr & context, const BackupSettings & backup_settings) + { + auto read_settings = context->getReadSettings(); + read_settings.remote_throttler = context->getBackupsThrottler(); + read_settings.local_throttler = context->getBackupsThrottler(); + read_settings.enable_filesystem_cache = false; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; + return read_settings; + } + + WriteSettings getWriteSettingsForBackup(const ContextPtr & context) + { + auto write_settings = context->getWriteSettings(); + write_settings.enable_filesystem_cache_on_write_operations = false; + return write_settings; + } + + ReadSettings getReadSettingsForRestore(const ContextPtr & context) + { + auto read_settings = context->getReadSettings(); + read_settings.remote_throttler = context->getBackupsThrottler(); + read_settings.local_throttler = context->getBackupsThrottler(); + read_settings.enable_filesystem_cache = false; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; + return read_settings; + } + + WriteSettings getWriteSettingsForRestore(const ContextPtr & context) + { + auto write_settings = context->getWriteSettings(); + write_settings.enable_filesystem_cache_on_write_operations = false; + return write_settings; + } } @@ -350,6 +386,8 @@ void BackupsWorker::doBackup( backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; + backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings); + backup_create_params.write_settings = getWriteSettingsForBackup(context); BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. @@ -383,7 +421,7 @@ void BackupsWorker::doBackup( } /// Write the backup entries to the backup. - buildFileInfosForBackupEntries(backup, backup_entries, backup_coordination); + buildFileInfosForBackupEntries(backup, backup_entries, backup_create_params.read_settings, backup_coordination); writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal); /// We have written our backup entries, we need to tell other hosts (they could be waiting for it). @@ -433,12 +471,12 @@ void BackupsWorker::doBackup( } -void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, std::shared_ptr backup_coordination) +void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination) { LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS); backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, ""); backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS); - backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), *backups_thread_pool)); + backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, *backups_thread_pool)); } @@ -650,6 +688,8 @@ void BackupsWorker::doRestore( backup_open_params.base_backup_info = restore_settings.base_backup_info; backup_open_params.password = restore_settings.password; backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy; + backup_open_params.read_settings = getReadSettingsForRestore(context); + backup_open_params.write_settings = getWriteSettingsForRestore(context); BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index cbfadc24b7b..ab4359ec257 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -24,6 +24,7 @@ using BackupPtr = std::shared_ptr; class IBackupEntry; using BackupEntries = std::vector>>; using DataRestoreTasks = std::vector>; +struct ReadSettings; /// Manager of backups and restores: executes backups and restores' threads in the background. /// Keeps information about backups and restores started in this session. @@ -107,7 +108,7 @@ private: bool called_async); /// Builds file infos for specified backup entries. - void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, std::shared_ptr backup_coordination); + void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination); /// Write backup entries to an opened backup. void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const OperationID & backup_id, std::shared_ptr backup_coordination, bool internal); diff --git a/src/Backups/IBackupEntriesLazyBatch.cpp b/src/Backups/IBackupEntriesLazyBatch.cpp index 7c6bb891981..4974d9f6702 100644 --- a/src/Backups/IBackupEntriesLazyBatch.cpp +++ b/src/Backups/IBackupEntriesLazyBatch.cpp @@ -19,8 +19,8 @@ public: std::unique_ptr getReadBuffer(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getReadBuffer(read_settings); } UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); } - UInt128 getChecksum() const override { return getInternalBackupEntry()->getChecksum(); } - std::optional getPartialChecksum(size_t prefix_length) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length); } + UInt128 getChecksum(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getChecksum(read_settings); } + std::optional getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length, read_settings); } DataSourceDescription getDataSourceDescription() const override { return getInternalBackupEntry()->getDataSourceDescription(); } bool isEncryptedByDisk() const override { return getInternalBackupEntry()->isEncryptedByDisk(); } bool isFromFile() const override { return getInternalBackupEntry()->isFromFile(); } diff --git a/src/Backups/IBackupEntry.h b/src/Backups/IBackupEntry.h index 7e952e9b568..1b72b4358ba 100644 --- a/src/Backups/IBackupEntry.h +++ b/src/Backups/IBackupEntry.h @@ -21,11 +21,11 @@ public: virtual UInt64 getSize() const = 0; /// Returns the checksum of the data. - virtual UInt128 getChecksum() const = 0; + virtual UInt128 getChecksum(const ReadSettings & read_settings) const = 0; /// Returns a partial checksum, i.e. the checksum calculated for a prefix part of the data. /// Can return nullopt if the partial checksum is too difficult to calculate. - virtual std::optional getPartialChecksum(size_t /* prefix_length */) const { return {}; } + virtual std::optional getPartialChecksum(size_t /* prefix_length */, const ReadSettings &) const { return {}; } /// Returns a read buffer for reading the data. virtual std::unique_ptr getReadBuffer(const ReadSettings & read_settings) const = 0; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 5b6f7825157..451e98b1290 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -107,12 +107,27 @@ void registerBackupEngineS3(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context); + auto reader = std::make_shared(S3::URI{s3_uri}, + access_key_id, + secret_access_key, + params.allow_s3_native_copy, + params.read_settings, + params.write_settings, + params.context); + return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { - auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.s3_storage_class, params.context); + auto writer = std::make_shared(S3::URI{s3_uri}, + access_key_id, + secret_access_key, + params.allow_s3_native_copy, + params.s3_storage_class, + params.read_settings, + params.write_settings, + params.context); + return std::make_unique( backup_name_for_logging, archive_params, diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index daae9627759..a498e287f15 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -169,18 +169,18 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) { std::shared_ptr reader; if (engine_name == "File") - reader = std::make_shared(path, params.context); + reader = std::make_shared(path, params.read_settings, params.write_settings); else - reader = std::make_shared(disk, path, params.context); + reader = std::make_shared(disk, path, params.read_settings, params.write_settings); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { std::shared_ptr writer; if (engine_name == "File") - writer = std::make_shared(path, params.context); + writer = std::make_shared(path, params.read_settings, params.write_settings); else - writer = std::make_shared(disk, path, params.context); + writer = std::make_shared(disk, path, params.read_settings, params.write_settings); return std::make_unique( backup_name_for_logging, archive_params, diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a5ff7643294..119cf3ac0c9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4550,14 +4550,6 @@ ReadSettings Context::getReadSettings() const return res; } -ReadSettings Context::getBackupReadSettings() const -{ - ReadSettings read_settings = getReadSettings(); - read_settings.remote_throttler = getBackupsThrottler(); - read_settings.local_throttler = getBackupsThrottler(); - return read_settings; -} - WriteSettings Context::getWriteSettings() const { WriteSettings res; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 676eb8412e5..cf6b1cca38d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1168,9 +1168,6 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; - /** Get settings for reading from filesystem for BACKUPs. */ - ReadSettings getBackupReadSettings() const; - /** Get settings for writing to filesystem. */ WriteSettings getWriteSettings() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5ef3899929e..7524dc2ffc3 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5153,7 +5153,6 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( MergeTreeData::PartsBackupEntries res; std::map> temp_dirs; TableLockHolder table_lock; - ReadSettings read_settings = local_context->getBackupReadSettings(); for (const auto & part : data_parts) { From e127d27453713be645e826191ca57854afc3748a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 23 Jul 2023 11:23:36 +0200 Subject: [PATCH 091/113] Disable using fs cache for backup/restore #2. --- src/Backups/BackupEntriesCollector.cpp | 2 ++ src/Backups/BackupEntriesCollector.h | 3 +++ src/Backups/BackupEntryFromSmallFile.cpp | 16 ++++++++-------- src/Backups/BackupEntryFromSmallFile.h | 4 ++-- src/Backups/BackupsWorker.cpp | 4 ++-- .../MergeTree/DataPartStorageOnDiskBase.cpp | 3 ++- .../MergeTree/DataPartStorageOnDiskBase.h | 1 + src/Storages/MergeTree/IDataPartStorage.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 3 +++ src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/StorageLog.cpp | 3 ++- src/Storages/StorageMemory.cpp | 8 +++++++- src/Storages/StorageMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- src/Storages/StorageStripeLog.cpp | 3 ++- 15 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 485d59eff38..2c7985f2baa 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -77,10 +77,12 @@ BackupEntriesCollector::BackupEntriesCollector( const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, + const ReadSettings & read_settings_, const ContextPtr & context_) : backup_query_elements(backup_query_elements_) , backup_settings(backup_settings_) , backup_coordination(backup_coordination_) + , read_settings(read_settings_) , context(context_) , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) , consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000)) diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index be6ca8d1ebe..54d82088129 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -30,6 +30,7 @@ public: BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, + const ReadSettings & read_settings_, const ContextPtr & context_); ~BackupEntriesCollector(); @@ -40,6 +41,7 @@ public: const BackupSettings & getBackupSettings() const { return backup_settings; } std::shared_ptr getBackupCoordination() const { return backup_coordination; } + const ReadSettings & getReadSettings() const { return read_settings; } ContextPtr getContext() const { return context; } /// Adds a backup entry which will be later returned by run(). @@ -93,6 +95,7 @@ private: const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; std::shared_ptr backup_coordination; + const ReadSettings read_settings; ContextPtr context; std::chrono::milliseconds on_cluster_first_sync_timeout; std::chrono::milliseconds consistent_metadata_snapshot_timeout; diff --git a/src/Backups/BackupEntryFromSmallFile.cpp b/src/Backups/BackupEntryFromSmallFile.cpp index d0a99056b59..55a851bdf8b 100644 --- a/src/Backups/BackupEntryFromSmallFile.cpp +++ b/src/Backups/BackupEntryFromSmallFile.cpp @@ -11,17 +11,17 @@ namespace DB { namespace { - String readFile(const String & file_path) + String readFile(const String & file_path, const ReadSettings & read_settings) { - auto buf = createReadBufferFromFileBase(file_path, /* settings= */ {}); + auto buf = createReadBufferFromFileBase(file_path, read_settings); String s; readStringUntilEOF(s, *buf); return s; } - String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted) + String readFile(const DiskPtr & disk, const String & file_path, const ReadSettings & read_settings, bool copy_encrypted) { - auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path); + auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, read_settings) : disk->readFile(file_path, read_settings); String s; readStringUntilEOF(s, *buf); return s; @@ -29,19 +29,19 @@ namespace } -BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_) +BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_) : file_path(file_path_) , data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_)) - , data(readFile(file_path_)) + , data(readFile(file_path_, read_settings_)) { } -BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_) +BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_) : disk(disk_) , file_path(file_path_) , data_source_description(disk_->getDataSourceDescription()) , copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted) - , data(readFile(disk_, file_path, copy_encrypted)) + , data(readFile(disk_, file_path, read_settings_, copy_encrypted)) { } diff --git a/src/Backups/BackupEntryFromSmallFile.h b/src/Backups/BackupEntryFromSmallFile.h index d6651ab8cb5..0c4b9ea15e7 100644 --- a/src/Backups/BackupEntryFromSmallFile.h +++ b/src/Backups/BackupEntryFromSmallFile.h @@ -13,8 +13,8 @@ using DiskPtr = std::shared_ptr; class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation { public: - explicit BackupEntryFromSmallFile(const String & file_path_); - BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false); + explicit BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_); + BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_ = false); std::unique_ptr getReadBuffer(const ReadSettings &) const override; UInt64 getSize() const override { return data.size(); } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 8a87f168a0d..a5c4a5314dc 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -186,7 +186,7 @@ namespace auto read_settings = context->getReadSettings(); read_settings.remote_throttler = context->getBackupsThrottler(); read_settings.local_throttler = context->getBackupsThrottler(); - read_settings.enable_filesystem_cache = false; + read_settings.enable_filesystem_cache = backup_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; return read_settings; } @@ -416,7 +416,7 @@ void BackupsWorker::doBackup( /// Prepare backup entries. BackupEntries backup_entries; { - BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context}; + BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, backup_create_params.read_settings, context}; backup_entries = backup_entries_collector.run(); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index a0492f5f38e..618dbd845ae 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -331,6 +331,7 @@ void DataPartStorageOnDiskBase::backup( const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const @@ -382,7 +383,7 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { - backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, copy_encrypted)); + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); continue; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 648bc908f59..6c2987e4971 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -55,6 +55,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const override; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 8dbf5caa168..19af6085547 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -221,6 +221,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const = 0; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7524dc2ffc3..0cfcd815cce 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5148,6 +5148,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, const ContextPtr & local_context) { MergeTreeData::PartsBackupEntries res; @@ -5186,6 +5187,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5198,6 +5200,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( projection_part->getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5e6b043c31c..9ee61134740 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1336,7 +1336,7 @@ protected: using PartsBackupEntries = std::vector; /// Makes backup entries to backup the parts of this table. - PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ContextPtr & local_context); + PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ReadSettings & read_settings, const ContextPtr & local_context); class RestoredPartsHolder; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index d02a51fab22..87aa71f3e8d 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -949,6 +949,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// *.bin @@ -980,7 +981,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index c990d488969..2ef1d8d3183 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -277,11 +277,13 @@ namespace const std::shared_ptr blocks_, const String & data_path_in_backup, const DiskPtr & temp_disk_, + const ReadSettings & read_settings_, UInt64 max_compress_block_size_) : context(context_) , metadata_snapshot(metadata_snapshot_) , blocks(blocks_) , temp_disk(temp_disk_) + , read_settings(read_settings_) , max_compress_block_size(max_compress_block_size_) { fs::path data_path_in_backup_fs = data_path_in_backup; @@ -371,7 +373,7 @@ namespace file_checker.update(temp_dir / fs::path{file_paths[i]}.filename()); } file_checker.save(); - backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path)}; + backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path, read_settings)}; } /// We don't need to keep `blocks` any longer. @@ -386,6 +388,7 @@ namespace std::shared_ptr blocks; DiskPtr temp_disk; std::optional temp_dir_owner; + ReadSettings read_settings; UInt64 max_compress_block_size; Strings file_paths; size_t data_bin_pos, index_mrk_pos, columns_txt_pos, count_txt_pos, sizes_json_pos; @@ -395,13 +398,16 @@ namespace void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; + backup_entries_collector.addBackupEntries(std::make_shared( backup_entries_collector.getContext(), getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, + read_settings, max_compress_block_size)->getBackupEntries()); } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 14ea2ff4afe..ad9013d9f13 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2253,6 +2253,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) { const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -2265,7 +2266,7 @@ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collec for (const auto & data_part : data_parts) min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1); - auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, read_settings, local_context); for (auto & part_backup_entries : parts_backup_entries) backup_entries_collector.addBackupEntries(std::move(part_backup_entries.backup_entries)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 23683ec2313..7fce373e26b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9826,6 +9826,7 @@ void StorageReplicatedMergeTree::backupData( /// because we need to coordinate them with other replicas (other replicas can have better parts). const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -9834,7 +9835,7 @@ void StorageReplicatedMergeTree::backupData( else data_parts = getVisibleDataPartsVector(local_context); - auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context); auto coordination = backup_entries_collector.getBackupCoordination(); String shared_id = getTableSharedID(); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 2f3b5f25ee4..0bfef5ed5e5 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -547,6 +547,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// data.bin @@ -576,7 +577,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( From 1cd79020790b153a7cdbda09159cc18e6de77a7e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 23 Jul 2023 11:24:01 +0200 Subject: [PATCH 092/113] Add test. --- .../configs/disk_s3.xml | 15 +++ .../test_backup_restore_s3/test.py | 100 +++++++++++++++++- 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml index c1fd059bc67..d635e39e13f 100644 --- a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -21,6 +21,13 @@ minio123 33554432 + + cache + disk_s3 + /tmp/s3_cache/ + 1000000000 + 1 + @@ -37,11 +44,19 @@ + + +
+ disk_s3_cache +
+
+
default disk_s3 disk_s3_plain + disk_s3_cache diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 8701bf0d832..21018924143 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -57,29 +57,42 @@ def get_events_for_query(query_id: str) -> Dict[str, int]: } +def format_settings(settings): + if not settings: + return "" + return "SETTINGS " + ",".join(f"{k}={v}" for k, v in settings.items()) + + def check_backup_and_restore( storage_policy, backup_destination, size=1000, - backup_name=None, + backup_settings=None, + restore_settings=None, + insert_settings=None, + optimize_table=True, ): + optimize_table_query = "OPTIMIZE TABLE data FINAL;" if optimize_table else "" + node.query( f""" DROP TABLE IF EXISTS data SYNC; CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; - INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size}; - OPTIMIZE TABLE data FINAL; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size} {format_settings(insert_settings)}; + {optimize_table_query} """ ) + try: backup_query_id = uuid.uuid4().hex node.query( - f"BACKUP TABLE data TO {backup_destination}", query_id=backup_query_id + f"BACKUP TABLE data TO {backup_destination} {format_settings(backup_settings)}", + query_id=backup_query_id, ) restore_query_id = uuid.uuid4().hex node.query( f""" - RESTORE TABLE data AS data_restored FROM {backup_destination}; + RESTORE TABLE data AS data_restored FROM {backup_destination} {format_settings(restore_settings)}; """, query_id=restore_query_id, ) @@ -114,6 +127,7 @@ def check_system_tables(): expected_disks = ( ("default", "local"), ("disk_s3", "s3"), + ("disk_s3_cache", "s3"), ("disk_s3_other_bucket", "s3"), ("disk_s3_plain", "s3_plain"), ) @@ -312,3 +326,79 @@ def test_incremental_backup_append_table_def(): assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n" assert "parts_to_throw_insert = 100" in node.query("SHOW CREATE TABLE data") + + +@pytest.mark.parametrize( + "in_cache_initially, allow_backup_read_cache, allow_s3_native_copy", + [ + (False, True, False), + (True, False, False), + (True, True, False), + (True, True, True), + ], +) +def test_backup_with_fs_cache( + in_cache_initially, allow_backup_read_cache, allow_s3_native_copy +): + storage_policy = "policy_s3_cache" + + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + + insert_settings = { + "enable_filesystem_cache_on_write_operations": int(in_cache_initially) + } + + backup_settings = { + "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": int( + allow_backup_read_cache + ), + "allow_s3_native_copy": int(allow_s3_native_copy), + } + + restore_settings = {"allow_s3_native_copy": int(allow_s3_native_copy)} + + backup_events, restore_events = check_backup_and_restore( + storage_policy, + backup_destination, + size=10, + insert_settings=insert_settings, + optimize_table=False, + backup_settings=backup_settings, + restore_settings=restore_settings, + ) + + #print(f"backup_events = {backup_events}") + #print(f"restore_events = {restore_events}") + + # BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that. + if allow_backup_read_cache and in_cache_initially: + assert backup_events["CachedReadBufferReadFromCacheBytes"] > 0 + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + elif allow_backup_read_cache: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert backup_events["CachedReadBufferReadFromSourceBytes"] > 0 + else: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + + assert not "CachedReadBufferCacheWriteBytes" in backup_events + assert not "CachedWriteBufferCacheWriteBytes" in backup_events + + # RESTORE doesn't use the filesystem cache during write operations. + # However while attaching parts it may use the cache while reading such files as "columns.txt" or "checksums.txt" or "primary.idx", + # see IMergeTreeDataPart::loadColumnsChecksumsIndexes() + if "CachedReadBufferReadFromSourceBytes" in restore_events: + assert ( + restore_events["CachedReadBufferReadFromSourceBytes"] + == restore_events["CachedReadBufferCacheWriteBytes"] + ) + + assert not "CachedReadBufferReadFromCacheBytes" in restore_events + + # "format_version.txt" is written when a table is created, + # see MergeTreeData::initializeDirectoriesAndFormatVersion() + if "CachedWriteBufferCacheWriteBytes" in restore_events: + assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1 From 556ecf5c1c44121bf7b96e945b4241eb445c826f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 23 Jul 2023 13:00:17 +0200 Subject: [PATCH 093/113] Renamed backup setting "read_from_filesystem_cache_if_exists_otherwise_bypass_cache" -> "read_from_filesystem_cache". --- src/Backups/BackupSettings.cpp | 2 +- src/Backups/BackupSettings.h | 2 +- src/Backups/BackupsWorker.cpp | 4 ++-- tests/integration/test_backup_restore_s3/test.py | 8 +++----- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 43259e6958a..650e817f4c3 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -27,7 +27,7 @@ namespace ErrorCodes M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ M(Bool, allow_s3_native_copy) \ - M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache) \ + M(Bool, read_from_filesystem_cache) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 2a950ef1b4f..68024ea1cbf 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -46,7 +46,7 @@ struct BackupSettings /// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, /// but don't put more entries into the cache. - bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + bool read_from_filesystem_cache = true; /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index a5c4a5314dc..90e76ef9b46 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -186,8 +186,8 @@ namespace auto read_settings = context->getReadSettings(); read_settings.remote_throttler = context->getBackupsThrottler(); read_settings.local_throttler = context->getBackupsThrottler(); - read_settings.enable_filesystem_cache = backup_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; - read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; + read_settings.enable_filesystem_cache = backup_settings.read_from_filesystem_cache; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache; return read_settings; } diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 21018924143..b6d4b4231d8 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -352,9 +352,7 @@ def test_backup_with_fs_cache( } backup_settings = { - "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": int( - allow_backup_read_cache - ), + "read_from_filesystem_cache": int(allow_backup_read_cache), "allow_s3_native_copy": int(allow_s3_native_copy), } @@ -370,8 +368,8 @@ def test_backup_with_fs_cache( restore_settings=restore_settings, ) - #print(f"backup_events = {backup_events}") - #print(f"restore_events = {restore_events}") + # print(f"backup_events = {backup_events}") + # print(f"restore_events = {restore_events}") # BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that. if allow_backup_read_cache and in_cache_initially: From 6ac61b1fdd51e226bf0b3d76f778dbcc14f14849 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 24 Jul 2023 18:13:44 +0200 Subject: [PATCH 094/113] Fix tests. --- src/Backups/tests/gtest_backup_entries.cpp | 14 +++++++------- tests/integration/test_backup_restore_s3/test.py | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Backups/tests/gtest_backup_entries.cpp b/src/Backups/tests/gtest_backup_entries.cpp index 75972b35ba4..2d5b993b95d 100644 --- a/src/Backups/tests/gtest_backup_entries.cpp +++ b/src/Backups/tests/gtest_backup_entries.cpp @@ -69,14 +69,14 @@ protected: static String getChecksum(const BackupEntryPtr & backup_entry) { - return getHexUIntUppercase(backup_entry->getChecksum()); + return getHexUIntUppercase(backup_entry->getChecksum({})); } static const constexpr std::string_view NO_CHECKSUM = "no checksum"; static String getPartialChecksum(const BackupEntryPtr & backup_entry, size_t prefix_length) { - auto partial_checksum = backup_entry->getPartialChecksum(prefix_length); + auto partial_checksum = backup_entry->getPartialChecksum(prefix_length, {}); if (!partial_checksum) return String{NO_CHECKSUM}; return getHexUIntUppercase(*partial_checksum); @@ -218,7 +218,7 @@ TEST_F(BackupEntriesTest, PartialChecksumBeforeFullChecksum) TEST_F(BackupEntriesTest, BackupEntryFromSmallFile) { writeFile(local_disk, "a.txt"); - auto entry = std::make_shared(local_disk, "a.txt"); + auto entry = std::make_shared(local_disk, "a.txt", ReadSettings{}); local_disk->removeFile("a.txt"); @@ -239,7 +239,7 @@ TEST_F(BackupEntriesTest, DecryptedEntriesFromEncryptedDisk) std::pair test_cases[] = {{std::make_shared(encrypted_disk, "a.txt"), false}, {std::make_shared(encrypted_disk, "a.txt"), true}, - {std::make_shared(encrypted_disk, "a.txt"), true}}; + {std::make_shared(encrypted_disk, "a.txt", ReadSettings{}), true}}; for (const auto & [entry, partial_checksum_allowed] : test_cases) { EXPECT_EQ(entry->getSize(), 9); @@ -258,7 +258,7 @@ TEST_F(BackupEntriesTest, DecryptedEntriesFromEncryptedDisk) BackupEntryPtr entries[] = {std::make_shared(encrypted_disk, "empty.txt"), std::make_shared(encrypted_disk, "empty.txt"), - std::make_shared(encrypted_disk, "empty.txt")}; + std::make_shared(encrypted_disk, "empty.txt", ReadSettings{})}; for (const auto & entry : entries) { EXPECT_EQ(entry->getSize(), 0); @@ -288,7 +288,7 @@ TEST_F(BackupEntriesTest, EncryptedEntriesFromEncryptedDisk) BackupEntryPtr entries[] = {std::make_shared(encrypted_disk, "a.txt", /* copy_encrypted= */ true), std::make_shared(encrypted_disk, "a.txt", /* copy_encrypted= */ true), - std::make_shared(encrypted_disk, "a.txt", /* copy_encrypted= */ true)}; + std::make_shared(encrypted_disk, "a.txt", ReadSettings{}, /* copy_encrypted= */ true)}; auto encrypted_checksum = getChecksum(entries[0]); EXPECT_NE(encrypted_checksum, NO_CHECKSUM); @@ -322,7 +322,7 @@ TEST_F(BackupEntriesTest, EncryptedEntriesFromEncryptedDisk) BackupEntryPtr entries[] = {std::make_shared(encrypted_disk, "empty.txt", /* copy_encrypted= */ true), std::make_shared(encrypted_disk, "empty.txt", /* copy_encrypted= */ true), - std::make_shared(encrypted_disk, "empty.txt", /* copy_encrypted= */ true)}; + std::make_shared(encrypted_disk, "empty.txt", ReadSettings{}, /* copy_encrypted= */ true)}; for (const auto & entry : entries) { EXPECT_EQ(entry->getSize(), 0); diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index b6d4b4231d8..f8ec39d240b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -198,7 +198,6 @@ def test_backup_to_s3_multipart(): storage_policy, backup_destination, size=1000000, - backup_name=backup_name, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" From d9e45e11c7d2bb28509d050141d8e7ad986b52e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 3 Aug 2023 13:59:39 +0200 Subject: [PATCH 095/113] Remove `test_host_regexp_multiple_ptr_records_concurrent`, CC @arthurpassos --- .../__init__.py | 0 .../configs/config.xml | 4 - .../configs/host_regexp.xml | 11 --- .../configs/listen_host.xml | 5 -- .../coredns_config/Corefile | 8 -- .../coredns_config/example.com | 1 - .../scripts/stress_test.py | 62 ------------- .../test.py | 88 ------------------- 8 files changed, 179 deletions(-) delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml deleted file mode 100644 index 42a1f962705..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 250 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml deleted file mode 100644 index 7a2141e6c7e..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - test1\.example\.com$ - - default - - - \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml deleted file mode 100644 index 58ef55cd3f3..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ /dev/null @@ -1,5 +0,0 @@ - - :: - 0.0.0.0 - 1 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile deleted file mode 100644 index 3edf37dafa5..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile +++ /dev/null @@ -1,8 +0,0 @@ -. { - hosts /example.com { - reload "20ms" - fallthrough - } - forward . 127.0.0.11 - log -} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com deleted file mode 100644 index 9beb415c290..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com +++ /dev/null @@ -1 +0,0 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py deleted file mode 100644 index fe69d72c1c7..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import pycurl -import threading -from io import BytesIO -import sys - -client_ip = sys.argv[1] -server_ip = sys.argv[2] - -mutex = threading.Lock() -success_counter = 0 -number_of_threads = 100 -number_of_iterations = 100 - - -def perform_request(): - buffer = BytesIO() - crl = pycurl.Curl() - crl.setopt(pycurl.INTERFACE, client_ip) - crl.setopt(crl.WRITEDATA, buffer) - crl.setopt(crl.URL, f"http://{server_ip}:8123/?query=select+1&user=test_dns") - - crl.perform() - - # End curl session - crl.close() - - str_response = buffer.getvalue().decode("iso-8859-1") - expected_response = "1\n" - - mutex.acquire() - - global success_counter - - if str_response == expected_response: - success_counter += 1 - - mutex.release() - - -def perform_multiple_requests(n): - for request_number in range(n): - perform_request() - - -threads = [] - - -for i in range(number_of_threads): - thread = threading.Thread( - target=perform_multiple_requests, args=(number_of_iterations,) - ) - thread.start() - threads.append(thread) - -for thread in threads: - thread.join() - - -if success_counter == number_of_threads * number_of_iterations: - exit(0) - -exit(1) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py deleted file mode 100644 index d73e8813e79..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py +++ /dev/null @@ -1,88 +0,0 @@ -import pytest -import socket -from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check -from time import sleep -import os - -DOCKER_COMPOSE_PATH = get_docker_compose_path() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -cluster = ClickHouseCluster(__file__) - -ch_server = cluster.add_instance( - "clickhouse-server", - with_coredns=True, - main_configs=["configs/config.xml", "configs/listen_host.xml"], - user_configs=["configs/host_regexp.xml"], -) - -client = cluster.add_instance( - "clickhouse-client", -) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def check_ptr_record(ip, hostname): - try: - host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) - if hostname.lower() == host.lower(): - return True - except socket.herror: - pass - return False - - -def setup_dns_server(ip): - domains_string = "test3.example.com test2.example.com test1.example.com" - example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' - run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) - - # DNS server takes time to reload the configuration. - for try_num in range(10): - if all(check_ptr_record(ip, host) for host in domains_string.split()): - break - sleep(1) - - -def setup_ch_server(dns_server_ip): - ch_server.exec_in_container( - (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) - ) - ch_server.exec_in_container( - (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) - ) - ch_server.query("SYSTEM DROP DNS CACHE") - - -def build_endpoint_v4(ip): - return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" - - -def build_endpoint_v6(ip): - return build_endpoint_v4(f"[{ip}]") - - -def test_host_regexp_multiple_ptr_v4(started_cluster): - server_ip = cluster.get_instance_ip("clickhouse-server") - client_ip = cluster.get_instance_ip("clickhouse-client") - dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - - setup_dns_server(client_ip) - setup_ch_server(dns_server_ip) - - current_dir = os.path.dirname(__file__) - client.copy_file_to_container( - os.path.join(current_dir, "scripts", "stress_test.py"), "stress_test.py" - ) - - client.exec_in_container(["python3", f"stress_test.py", client_ip, server_ip]) From 3a9f9e12979a5bb55485554c45337389f274a256 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 3 Aug 2023 15:05:52 +0300 Subject: [PATCH 096/113] Revert "Implementing new commands for keeper-client" --- .../utilities/clickhouse-keeper-client.md | 4 - programs/keeper-client/Commands.cpp | 184 +----------------- programs/keeper-client/Commands.h | 78 +------- programs/keeper-client/KeeperClient.cpp | 4 - programs/keeper-client/Parser.cpp | 1 - tests/integration/test_keeper_client/test.py | 130 +++---------- 6 files changed, 36 insertions(+), 365 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 37eb0bb71ff..77f816fe428 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -51,7 +51,3 @@ keeper foo bar - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message -- `get_stat [path]` -- Returns the node's stat (default `.`) -- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) -- `delete_stable_backups` -- Deletes ClickHouse nodes used for backups that are now inactive -- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index fd0a00d59db..05928a0d20b 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -1,6 +1,5 @@ #include "Commands.h" -#include #include "KeeperClient.h" @@ -25,18 +24,8 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con else path = client->cwd; - auto children = client->zookeeper->getChildren(path); - std::sort(children.begin(), children.end()); - - bool need_space = false; - for (const auto & child : children) - { - if (std::exchange(need_space, true)) - std::cout << " "; - - std::cout << child; - } - + for (const auto & child : client->zookeeper->getChildren(path)) + std::cout << child << " "; std::cout << "\n"; } @@ -141,173 +130,6 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; } -bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const -{ - String arg; - if (!parseKeeperPath(pos, expected, arg)) - return true; - - node->args.push_back(std::move(arg)); - return true; -} - -void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const -{ - Coordination::Stat stat; - String path; - if (!query->args.empty()) - path = client->getAbsolutePath(query->args[0].safeGet()); - else - path = client->cwd; - - client->zookeeper->get(path, &stat); - - std::cout << "cZxid = " << stat.czxid << "\n"; - std::cout << "mZxid = " << stat.mzxid << "\n"; - std::cout << "pZxid = " << stat.pzxid << "\n"; - std::cout << "ctime = " << stat.ctime << "\n"; - std::cout << "mtime = " << stat.mtime << "\n"; - std::cout << "version = " << stat.version << "\n"; - std::cout << "cversion = " << stat.cversion << "\n"; - std::cout << "aversion = " << stat.aversion << "\n"; - std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n"; - std::cout << "dataLength = " << stat.dataLength << "\n"; - std::cout << "numChildren = " << stat.numChildren << "\n"; -} - -bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const -{ - ASTPtr threshold; - if (!ParserUnsignedInteger{}.parse(pos, threshold, expected)) - return false; - - node->args.push_back(threshold->as().value); - - String path; - if (!parseKeeperPath(pos, expected, path)) - path = "."; - - node->args.push_back(std::move(path)); - return true; -} - -void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const -{ - auto threshold = query->args[0].safeGet(); - auto path = client->getAbsolutePath(query->args[1].safeGet()); - - Coordination::Stat stat; - client->zookeeper->get(path, &stat); - - if (stat.numChildren >= static_cast(threshold)) - { - std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; - return; - } - - auto children = client->zookeeper->getChildren(path); - std::sort(children.begin(), children.end()); - for (const auto & child : children) - { - auto next_query = *query; - next_query.args[1] = DB::Field(path / child); - execute(&next_query, client); - } -} - -bool DeleteStableBackups::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const -{ - return true; -} - -void DeleteStableBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const -{ - client->askConfirmation( - "You are going to delete all inactive backups in /clickhouse/backups.", - [client] - { - fs::path backup_root = "/clickhouse/backups"; - auto backups = client->zookeeper->getChildren(backup_root); - std::sort(backups.begin(), backups.end()); - - for (const auto & child : backups) - { - auto backup_path = backup_root / child; - std::cout << "Found backup " << backup_path << ", checking if it's active\n"; - - String stage_path = backup_path / "stage"; - auto stages = client->zookeeper->getChildren(stage_path); - - bool is_active = false; - for (const auto & stage : stages) - { - if (startsWith(stage, "alive")) - { - is_active = true; - break; - } - } - - if (is_active) - { - std::cout << "Backup " << backup_path << " is active, not going to delete\n"; - continue; - } - - std::cout << "Backup " << backup_path << " is not active, deleting it\n"; - client->zookeeper->removeRecursive(backup_path); - } - }); -} - -bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const -{ - String path; - if (!parseKeeperPath(pos, expected, path)) - path = "."; - - node->args.push_back(std::move(path)); - - ASTPtr count; - if (ParserUnsignedInteger{}.parse(pos, count, expected)) - node->args.push_back(count->as().value); - else - node->args.push_back(UInt64(10)); - - return true; -} - -void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const -{ - auto path = client->getAbsolutePath(query->args[0].safeGet()); - auto n = query->args[1].safeGet(); - - std::vector> result; - - std::queue queue; - queue.push(path); - while (!queue.empty()) - { - auto next_path = queue.front(); - queue.pop(); - - auto children = client->zookeeper->getChildren(next_path); - std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; }); - - auto response = client->zookeeper->get(children); - - for (size_t i = 0; i < response.size(); ++i) - { - result.emplace_back(response[i].stat.numChildren, children[i]); - queue.push(children[i]); - } - } - - std::sort(result.begin(), result.end(), std::greater()); - for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) - std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n"; -} - bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; @@ -348,7 +170,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptrgenerateHelpString() << "\n"; + std::cout << pair.second->getHelpMessage() << "\n"; } bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h index 093920cb10d..e4debd53e42 100644 --- a/programs/keeper-client/Commands.h +++ b/programs/keeper-client/Commands.h @@ -21,12 +21,6 @@ public: virtual String getName() const = 0; virtual ~IKeeperClientCommand() = default; - - String generateHelpString() const - { - return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName())); - } - }; using Command = std::shared_ptr; @@ -40,7 +34,7 @@ class LSCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; } + String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; } }; class CDCommand : public IKeeperClientCommand @@ -51,7 +45,7 @@ class CDCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; } + String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; } }; class SetCommand : public IKeeperClientCommand @@ -64,7 +58,7 @@ class SetCommand : public IKeeperClientCommand String getHelpMessage() const override { - return "{} [version] -- Updates the node's value. Only update if version matches (default: -1)"; + return "set [version] -- Updates the node's value. Only update if version matches (default: -1)"; } }; @@ -76,7 +70,7 @@ class CreateCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Creates new node"; } + String getHelpMessage() const override { return "create -- Creates new node"; } }; class GetCommand : public IKeeperClientCommand @@ -87,63 +81,9 @@ class GetCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Returns the node's value"; } + String getHelpMessage() const override { return "get -- Returns the node's value"; } }; -class GetStatCommand : public IKeeperClientCommand -{ - String getName() const override { return "get_stat"; } - - bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; - - void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - - String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; } -}; - -class FindSuperNodes : public IKeeperClientCommand -{ - String getName() const override { return "find_super_nodes"; } - - bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; - - void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - - String getHelpMessage() const override - { - return "{} [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)"; - } -}; - -class DeleteStableBackups : public IKeeperClientCommand -{ - String getName() const override { return "delete_stable_backups"; } - - bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; - - void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - - String getHelpMessage() const override - { - return "{} -- Deletes ClickHouse nodes used for backups that are now inactive"; - } -}; - -class FindBigFamily : public IKeeperClientCommand -{ - String getName() const override { return "find_big_family"; } - - bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; - - void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - - String getHelpMessage() const override - { - return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)"; - } -}; - - class RMCommand : public IKeeperClientCommand { String getName() const override { return "rm"; } @@ -152,7 +92,7 @@ class RMCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Remove the node"; } + String getHelpMessage() const override { return "remove -- Remove the node"; } }; class RMRCommand : public IKeeperClientCommand @@ -163,7 +103,7 @@ class RMRCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Recursively deletes path. Confirmation required"; } + String getHelpMessage() const override { return "rmr -- Recursively deletes path. Confirmation required"; } }; class HelpCommand : public IKeeperClientCommand @@ -174,7 +114,7 @@ class HelpCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Prints this message"; } + String getHelpMessage() const override { return "help -- Prints this message"; } }; class FourLetterWordCommand : public IKeeperClientCommand @@ -185,7 +125,7 @@ class FourLetterWordCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "{} -- Executes four-letter-word command"; } + String getHelpMessage() const override { return "flwc -- Executes four-letter-word command"; } }; } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 561a1f41f7a..f41dca1e27a 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -177,10 +177,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) std::make_shared(), std::make_shared(), std::make_shared(), - std::make_shared(), - std::make_shared(), - std::make_shared(), - std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index fe46058fcc1..3420ccb2219 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -58,7 +58,6 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; String command_name(pos->begin, pos->end); - std::transform(command_name.begin(), command_name.end(), command_name.begin(), [](unsigned char c) { return std::tolower(c); }); Command command; auto iter = KeeperClient::commands.find(command_name); diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 3187ce10d2a..00c7908eeed 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -1,7 +1,6 @@ import pytest from helpers.client import CommandRequest from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) @@ -14,7 +13,7 @@ node = cluster.add_instance( ) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def started_cluster(): try: cluster.start() @@ -24,122 +23,41 @@ def started_cluster(): cluster.shutdown() -def keeper_query(query: str): - return CommandRequest( +def test_base_commands(started_cluster): + _ = started_cluster + + command = CommandRequest( [ - cluster.server_bin_path, + started_cluster.server_bin_path, "keeper-client", "--host", str(cluster.get_instance_ip("zoo1")), "--port", str(cluster.zookeeper_port), "-q", - query, + "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;", ], stdin="", ) - -def test_big_family(): - command = keeper_query( - "create test_big_family foo;" - "create test_big_family/1 foo;" - "create test_big_family/1/1 foo;" - "create test_big_family/1/2 foo;" - "create test_big_family/1/3 foo;" - "create test_big_family/1/4 foo;" - "create test_big_family/1/5 foo;" - "create test_big_family/2 foo;" - "create test_big_family/2/1 foo;" - "create test_big_family/2/2 foo;" - "create test_big_family/2/3 foo;" - "find_big_family test_big_family;" - ) - - assert command.get_answer() == TSV( - [ - ["/test_big_family/1", "5"], - ["/test_big_family/2", "3"], - ["/test_big_family/2/3", "0"], - ["/test_big_family/2/2", "0"], - ["/test_big_family/2/1", "0"], - ["/test_big_family/1/5", "0"], - ["/test_big_family/1/4", "0"], - ["/test_big_family/1/3", "0"], - ["/test_big_family/1/2", "0"], - ["/test_big_family/1/1", "0"], - ] - ) - - command = keeper_query("find_big_family test_big_family 1;") - - assert command.get_answer() == TSV( - [ - ["/test_big_family/1", "5"], - ] - ) - - -def test_find_super_nodes(): - command = keeper_query( - "create test_find_super_nodes foo;" - "create test_find_super_nodes/1 foo;" - "create test_find_super_nodes/1/1 foo;" - "create test_find_super_nodes/1/2 foo;" - "create test_find_super_nodes/1/3 foo;" - "create test_find_super_nodes/1/4 foo;" - "create test_find_super_nodes/1/5 foo;" - "create test_find_super_nodes/2 foo;" - "create test_find_super_nodes/2/1 foo;" - "create test_find_super_nodes/2/2 foo;" - "create test_find_super_nodes/2/3 foo;" - "create test_find_super_nodes/2/4 foo;" - "cd test_find_super_nodes;" - "find_super_nodes 4;" - ) - - assert command.get_answer() == TSV( - [ - ["/test_find_super_nodes/1", "5"], - ["/test_find_super_nodes/2", "4"], - ] - ) - - -def test_delete_stable_backups(): - command = keeper_query( - "create /clickhouse/backups foo;" - "create /clickhouse/backups/1 foo;" - "create /clickhouse/backups/1/stage foo;" - "create /clickhouse/backups/1/stage/alive123 foo;" - "create /clickhouse/backups/2 foo;" - "create /clickhouse/backups/2/stage foo;" - "create /clickhouse/backups/2/stage/dead123 foo;" - "delete_stable_backups;" - "y;" - "ls clickhouse/backups;" - ) - - assert command.get_answer() == ( - "You are going to delete all inactive backups in /clickhouse/backups. Continue?\n" - 'Found backup "/clickhouse/backups/1", checking if it\'s active\n' - 'Backup "/clickhouse/backups/1" is active, not going to delete\n' - 'Found backup "/clickhouse/backups/2", checking if it\'s active\n' - 'Backup "/clickhouse/backups/2" is not active, deleting it\n' - "1\n" - ) - - -def test_base_commands(): - command = keeper_query( - "create test_create_zk_node1 testvalue1;" - "create test_create_zk_node_2 testvalue2;" - "get test_create_zk_node1;" - ) - assert command.get_answer() == "testvalue1\n" -def test_four_letter_word_commands(): - command = keeper_query("ruok") +def test_four_letter_word_commands(started_cluster): + _ = started_cluster + + command = CommandRequest( + [ + started_cluster.server_bin_path, + "keeper-client", + "--host", + str(cluster.get_instance_ip("zoo1")), + "--port", + str(cluster.zookeeper_port), + "-q", + "ruok", + ], + stdin="", + ) + assert command.get_answer() == "imok\n" From cc8c2c88bf160750545f1c33a00343ecf9e8d37f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 3 Aug 2023 14:25:37 +0200 Subject: [PATCH 097/113] Remove assertion from test_no_ttl_merges_in_busy_pool --- tests/integration/test_concurrent_ttl_merges/test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index 96264e53522..3a3981d65ba 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -103,10 +103,6 @@ def test_no_ttl_merges_in_busy_pool(started_cluster): rows_count.append(int(node1.query("SELECT count() FROM test_ttl").strip())) time.sleep(0.5) - # at least several seconds we didn't run any TTL merges and rows count equal - # to the original value - assert sum([1 for count in rows_count if count == 30]) > 4 - assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0") node1.query("DROP TABLE test_ttl SYNC") From a98fae936d69b66518256e5067e3d1defa3a8048 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 3 Aug 2023 14:33:29 +0200 Subject: [PATCH 098/113] Fix `test_dictionary_custom_settings` --- tests/integration/test_dictionary_custom_settings/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py index 715219ceb87..6b5ea643998 100644 --- a/tests/integration/test_dictionary_custom_settings/test.py +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -2,6 +2,8 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + DICTIONARY_FILES = [ "configs/dictionaries/FileSourceConfig.xml", @@ -78,5 +80,7 @@ def test_work(start_cluster): assert caught_exception.find("Limit for result exceeded") != -1 - assert query("SELECT dictGetString('test_http', 'first', toUInt64(1))") == "\\'a\n" - assert query("SELECT dictGetString('test_http', 'second', toUInt64(1))") == '"b\n' + # It is possible that the HTTP server takes long time to start accepting connections + + assert_eq_with_retry(instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n") + assert_eq_with_retry(instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n') From 8499956321dede2b89bd6f627d493edee7b5d486 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 2 Aug 2023 18:27:14 +0200 Subject: [PATCH 099/113] Refactor CI_CONFIG from dict to dataclasses --- tests/ci/build_check.py | 28 +- tests/ci/build_download_helper.py | 2 +- tests/ci/build_report_check.py | 2 +- tests/ci/ci_config.py | 620 +++++++++-------------- tests/ci/commit_status_helper.py | 3 +- tests/ci/download_binary.py | 8 +- tests/ci/performance_comparison_check.py | 11 +- 7 files changed, 267 insertions(+), 407 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index b75650d6d60..592c27c2c68 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -38,11 +38,11 @@ BUILD_LOG_NAME = "build_log.log" def _can_export_binaries(build_config: BuildConfig) -> bool: - if build_config["package_type"] != "deb": + if build_config.package_type != "deb": return False - if build_config["sanitizer"] != "": + if build_config.sanitizer != "": return True - if build_config["debug_build"]: + if build_config.debug_build: return True return False @@ -55,26 +55,26 @@ def get_packager_cmd( image_version: str, official: bool, ) -> str: - package_type = build_config["package_type"] - comp = build_config["compiler"] + package_type = build_config.package_type + comp = build_config.compiler cmake_flags = "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=1" cmd = ( f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager --output-dir={output_path} " f"--package-type={package_type} --compiler={comp}" ) - if build_config["debug_build"]: + if build_config.debug_build: cmd += " --debug-build" - if build_config["sanitizer"]: - cmd += f" --sanitizer={build_config['sanitizer']}" - if build_config["tidy"] == "enable": + if build_config.sanitizer: + cmd += f" --sanitizer={build_config.sanitizer}" + if build_config.tidy: cmd += " --clang-tidy" cmd += " --cache=sccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" - if "additional_pkgs" in build_config and build_config["additional_pkgs"]: + if build_config.additional_pkgs: cmd += " --additional-pkgs" cmd += f" --docker-image-version={image_version}" @@ -180,7 +180,7 @@ def create_json_artifact( result = { "log_url": log_url, "build_urls": build_urls, - "build_config": build_config, + "build_config": build_config.__dict__, "elapsed_seconds": elapsed, "status": success, "job_name": GITHUB_JOB, @@ -220,7 +220,7 @@ def upload_master_static_binaries( build_output_path: str, ) -> None: """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.get("static_binary_name", False) + static_binary_name = build_config.static_binary_name if pr_info.number != 0: return elif not static_binary_name: @@ -240,7 +240,7 @@ def main(): stopwatch = Stopwatch() build_name = sys.argv[1] - build_config = CI_CONFIG["build_config"][build_name] + build_config = CI_CONFIG.build_config[build_name] if not os.path.exists(TEMP_PATH): os.makedirs(TEMP_PATH) @@ -270,8 +270,6 @@ def main(): logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 - if "official" in build_config: - official_flag = build_config["official"] version_type = "testing" if "release" in pr_info.labels or "release-lts" in pr_info.labels: diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 47c11ee0911..ec4cf8f9bfa 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -91,7 +91,7 @@ def get_gh_api( def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG["tests_config"][check_name]["required_build"] # type: ignore + return CI_CONFIG.test_configs[check_name].required_build def read_build_urls(build_name: str, reports_path: str) -> List[str]: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 295b6cf9740..a134cb19346 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -149,7 +149,7 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - builds_for_check = CI_CONFIG["builds_report_config"][build_check_name] + builds_for_check = CI_CONFIG.builds_report_config[build_check_name] required_builds = required_builds or len(builds_for_check) # Collect reports from json artifacts diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1e921f4a0cc..9d170fe8ed6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,180 +1,184 @@ #!/usr/bin/env python3 +import logging + from dataclasses import dataclass -from typing import Callable, Dict, TypeVar +from typing import Callable, Dict, List, Literal -ConfValue = TypeVar("ConfValue", str, bool) -BuildConfig = Dict[str, ConfValue] -CI_CONFIG = { - "build_config": { - "package_release": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "amd64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_aarch64": { - "compiler": "clang-16-aarch64", - "debug_build": False, - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "aarch64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_asan": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "address", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_ubsan": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "undefined", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_tsan": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "thread", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_msan": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "memory", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_debug": { - "compiler": "clang-16", - "debug_build": True, - "sanitizer": "", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "Note: sparse checkout was used", - }, - "binary_release": { - "compiler": "clang-16", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_tidy": { - "compiler": "clang-16", - "debug_build": True, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "debug-amd64", - "tidy": "enable", - "with_coverage": False, - "comment": "clang-tidy is used for static analysis", - }, - "binary_darwin": { - "compiler": "clang-16-darwin", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64": { - "compiler": "clang-16-aarch64", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64_v80compat": { - "compiler": "clang-16-aarch64-v80compat", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "aarch64v80compat", - "tidy": "disable", - "with_coverage": False, - "comment": "For ARMv8.1 and older", - }, - "binary_freebsd": { - "compiler": "clang-16-freebsd", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "freebsd", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_darwin_aarch64": { - "compiler": "clang-16-darwin-aarch64", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos-aarch64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_ppc64le": { - "compiler": "clang-16-ppc64le", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "powerpc64le", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_amd64_compat": { - "compiler": "clang-16-amd64-compat", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "amd64compat", - "tidy": "disable", - "with_coverage": False, - "comment": "SSE2-only build", - }, - "binary_riscv64": { - "compiler": "clang-16-riscv64", - "debug_build": False, - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "riscv64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, +@dataclass +class BuildConfig: + compiler: str + package_type: Literal["deb", "binary"] + additional_pkgs: bool = False + debug_build: bool = False + sanitizer: str = "" + tidy: bool = False + comment: str = "" + static_binary_name: str = "" + + +@dataclass +class TestConfig: + required_build: str + force_tests: bool = False + + +BuildConfigs = Dict[str, BuildConfig] +BuildsReportConfig = Dict[str, List[str]] +TestConfigs = Dict[str, TestConfig] + + +@dataclass +class CiConfig: + build_config: BuildConfigs + builds_report_config: BuildsReportConfig + test_configs: TestConfigs + + def validate(self) -> None: + errors = [] + # All build configs must belong to build_report_config + for build_name in self.build_config.keys(): + build_in_reports = False + for report_config in self.builds_report_config.values(): + if build_name in report_config: + build_in_reports = True + break + if not build_in_reports: + logging.error( + "Build name %s does not belong to build reports", build_name + ) + errors.append( + f"Build name {build_name} does not belong to build reports" + ) + # And otherwise + for build_report_name, build_names in self.builds_report_config.items(): + missed_names = [ + name for name in build_names if name not in self.build_config.keys() + ] + if missed_names: + logging.error( + "The following names of the build report '%s' " + "are missed in build_config: %s", + build_report_name, + missed_names, + ) + errors.append( + f"The following names of the build report '{build_report_name}' " + f"are missed in build_config: {missed_names}", + ) + # And finally, all of tests' requirements must be in the builds + for test_name, test_config in self.test_configs.items(): + if test_config.required_build not in self.build_config.keys(): + logging.error( + "The requierment '%s' for '%s' is not found in builds", + test_config, + test_name, + ) + errors.append( + f"The requierment '{test_config}' for " + f"'{test_name}' is not found in builds" + ) + + if errors: + raise KeyError("config contains errors", errors) + + +CI_CONFIG = CiConfig( + build_config={ + "package_release": BuildConfig( + compiler="clang-16", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ), + "package_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ), + "package_asan": BuildConfig( + compiler="clang-16", + sanitizer="address", + package_type="deb", + ), + "package_ubsan": BuildConfig( + compiler="clang-16", + sanitizer="undefined", + package_type="deb", + ), + "package_tsan": BuildConfig( + compiler="clang-16", + sanitizer="thread", + package_type="deb", + ), + "package_msan": BuildConfig( + compiler="clang-16", + sanitizer="memory", + package_type="deb", + ), + "package_debug": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="deb", + comment="Note: sparse checkout was used", + ), + "binary_release": BuildConfig( + compiler="clang-16", + package_type="binary", + ), + "binary_tidy": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + "binary_darwin": BuildConfig( + compiler="clang-16-darwin", + package_type="binary", + static_binary_name="macos", + ), + "binary_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="binary", + ), + "binary_aarch64_v80compat": BuildConfig( + compiler="clang-16-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + "binary_freebsd": BuildConfig( + compiler="clang-16-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + "binary_darwin_aarch64": BuildConfig( + compiler="clang-16-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + "binary_ppc64le": BuildConfig( + compiler="clang-16-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + "binary_amd64_compat": BuildConfig( + compiler="clang-16-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + "binary_riscv64": BuildConfig( + compiler="clang-16-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), }, - "builds_report_config": { + builds_report_config={ "ClickHouse build check": [ "package_release", "package_aarch64", @@ -197,213 +201,79 @@ CI_CONFIG = { "binary_amd64_compat", ], }, - "tests_config": { - # required_build - build name for artifacts - # force_tests - force success status for tests - "Install packages (amd64)": { - "required_build": "package_release", - }, - "Install packages (arm64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (asan)": { - "required_build": "package_asan", - }, - "Stateful tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateful tests (msan)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateful tests (debug)": { - "required_build": "package_debug", - }, - "Stateful tests (release)": { - "required_build": "package_release", - }, - "Stateful tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateful tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, + test_configs={ + "Install packages (amd64)": TestConfig("package_release"), + "Install packages (arm64)": TestConfig("package_aarch64"), + "Stateful tests (asan)": TestConfig("package_asan"), + "Stateful tests (tsan)": TestConfig("package_tsan"), + "Stateful tests (msan)": TestConfig("package_msan"), + "Stateful tests (ubsan)": TestConfig("package_ubsan"), + "Stateful tests (debug)": TestConfig("package_debug"), + "Stateful tests (release)": TestConfig("package_release"), + "Stateful tests (aarch64)": TestConfig("package_aarch64"), + "Stateful tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateful tests (release, DatabaseReplicated)": TestConfig("package_release"), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": { - "required_build": "package_release", - }, - "Stateful tests (debug, ParallelReplicas)": { - "required_build": "package_debug", - }, - "Stateful tests (asan, ParallelReplicas)": { - "required_build": "package_asan", - }, - "Stateful tests (msan, ParallelReplicas)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan, ParallelReplicas)": { - "required_build": "package_ubsan", - }, - "Stateful tests (tsan, ParallelReplicas)": { - "required_build": "package_tsan", - }, + "Stateful tests (release, ParallelReplicas)": TestConfig("package_release"), + "Stateful tests (debug, ParallelReplicas)": TestConfig("package_debug"), + "Stateful tests (asan, ParallelReplicas)": TestConfig("package_asan"), + "Stateful tests (msan, ParallelReplicas)": TestConfig("package_msan"), + "Stateful tests (ubsan, ParallelReplicas)": TestConfig("package_ubsan"), + "Stateful tests (tsan, ParallelReplicas)": TestConfig("package_tsan"), # End stateful tests for parallel replicas - "Stateless tests (asan)": { - "required_build": "package_asan", - }, - "Stateless tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateless tests (msan)": { - "required_build": "package_msan", - }, - "Stateless tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests (debug)": { - "required_build": "package_debug", - }, - "Stateless tests (release)": { - "required_build": "package_release", - }, - "Stateless tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateless tests (release, wide parts enabled)": { - "required_build": "package_release", - }, - "Stateless tests (release, analyzer)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, - "Stateless tests (release, s3 storage)": { - "required_build": "package_release", - }, - "Stateless tests (debug, s3 storage)": { - "required_build": "package_debug", - }, - "Stateless tests (tsan, s3 storage)": { - "required_build": "package_tsan", - }, - "Stress test (asan)": { - "required_build": "package_asan", - }, - "Stress test (tsan)": { - "required_build": "package_tsan", - }, - "Stress test (ubsan)": { - "required_build": "package_ubsan", - }, - "Stress test (msan)": { - "required_build": "package_msan", - }, - "Stress test (debug)": { - "required_build": "package_debug", - }, - "Upgrade check (asan)": { - "required_build": "package_asan", - }, - "Upgrade check (tsan)": { - "required_build": "package_tsan", - }, - "Upgrade check (msan)": { - "required_build": "package_msan", - }, - "Upgrade check (debug)": { - "required_build": "package_debug", - }, - "Integration tests (asan)": { - "required_build": "package_asan", - }, - "Integration tests (asan, analyzer)": { - "required_build": "package_asan", - }, - "Integration tests (tsan)": { - "required_build": "package_tsan", - }, - "Integration tests (release)": { - "required_build": "package_release", - }, - "Integration tests (msan)": { - "required_build": "package_msan", - }, - "Integration tests flaky check (asan)": { - "required_build": "package_asan", - }, - "Compatibility check (amd64)": { - "required_build": "package_release", - }, - "Compatibility check (aarch64)": { - "required_build": "package_aarch64", - }, - "Unit tests (release)": { - "required_build": "binary_release", - }, - "Unit tests (asan)": { - "required_build": "package_asan", - }, - "Unit tests (msan)": { - "required_build": "package_msan", - }, - "Unit tests (tsan)": { - "required_build": "package_tsan", - }, - "Unit tests (ubsan)": { - "required_build": "package_ubsan", - }, - "AST fuzzer (debug)": { - "required_build": "package_debug", - }, - "AST fuzzer (asan)": { - "required_build": "package_asan", - }, - "AST fuzzer (msan)": { - "required_build": "package_msan", - }, - "AST fuzzer (tsan)": { - "required_build": "package_tsan", - }, - "AST fuzzer (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests flaky check (asan)": { - "required_build": "package_asan", - }, - "ClickHouse Keeper Jepsen": { - "required_build": "binary_release", - }, - "ClickHouse Server Jepsen": { - "required_build": "binary_release", - }, - "Performance Comparison": { - "required_build": "package_release", - "test_grep_exclude_filter": "", - }, - "Performance Comparison Aarch64": { - "required_build": "package_aarch64", - "test_grep_exclude_filter": "", - }, - "SQLancer (release)": { - "required_build": "package_release", - }, - "SQLancer (debug)": { - "required_build": "package_debug", - }, - "Sqllogic test (release)": { - "required_build": "package_release", - }, + "Stateless tests (asan)": TestConfig("package_asan"), + "Stateless tests (tsan)": TestConfig("package_tsan"), + "Stateless tests (msan)": TestConfig("package_msan"), + "Stateless tests (ubsan)": TestConfig("package_ubsan"), + "Stateless tests (debug)": TestConfig("package_debug"), + "Stateless tests (release)": TestConfig("package_release"), + "Stateless tests (aarch64)": TestConfig("package_aarch64"), + "Stateless tests (release, wide parts enabled)": TestConfig("package_release"), + "Stateless tests (release, analyzer)": TestConfig("package_release"), + "Stateless tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateless tests (release, DatabaseReplicated)": TestConfig("package_release"), + "Stateless tests (release, s3 storage)": TestConfig("package_release"), + "Stateless tests (debug, s3 storage)": TestConfig("package_debug"), + "Stateless tests (tsan, s3 storage)": TestConfig("package_tsan"), + "Stress test (asan)": TestConfig("package_asan"), + "Stress test (tsan)": TestConfig("package_tsan"), + "Stress test (ubsan)": TestConfig("package_ubsan"), + "Stress test (msan)": TestConfig("package_msan"), + "Stress test (debug)": TestConfig("package_debug"), + "Upgrade check (asan)": TestConfig("package_asan"), + "Upgrade check (tsan)": TestConfig("package_tsan"), + "Upgrade check (msan)": TestConfig("package_msan"), + "Upgrade check (debug)": TestConfig("package_debug"), + "Integration tests (asan)": TestConfig("package_asan"), + "Integration tests (asan, analyzer)": TestConfig("package_asan"), + "Integration tests (tsan)": TestConfig("package_tsan"), + "Integration tests (release)": TestConfig("package_release"), + "Integration tests (msan)": TestConfig("package_msan"), + "Integration tests flaky check (asan)": TestConfig("package_asan"), + "Compatibility check (amd64)": TestConfig("package_release"), + "Compatibility check (aarch64)": TestConfig("package_aarch64"), + "Unit tests (release)": TestConfig("binary_release"), + "Unit tests (asan)": TestConfig("package_asan"), + "Unit tests (msan)": TestConfig("package_msan"), + "Unit tests (tsan)": TestConfig("package_tsan"), + "Unit tests (ubsan)": TestConfig("package_ubsan"), + "AST fuzzer (debug)": TestConfig("package_debug"), + "AST fuzzer (asan)": TestConfig("package_asan"), + "AST fuzzer (msan)": TestConfig("package_msan"), + "AST fuzzer (tsan)": TestConfig("package_tsan"), + "AST fuzzer (ubsan)": TestConfig("package_ubsan"), + "Stateless tests flaky check (asan)": TestConfig("package_asan"), + "ClickHouse Keeper Jepsen": TestConfig("binary_release"), + "ClickHouse Server Jepsen": TestConfig("binary_release"), + "Performance Comparison": TestConfig("package_release"), + "Performance Comparison Aarch64": TestConfig("package_aarch64"), + "SQLancer (release)": TestConfig("package_release"), + "SQLancer (debug)": TestConfig("package_debug"), + "Sqllogic test (release)": TestConfig("package_release"), }, -} # type: dict +) +CI_CONFIG.validate() + # checks required by Mergeable Check REQUIRED_CHECKS = [ diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index efe149b0aa4..945bcfe05ed 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -51,7 +51,8 @@ class RerunHelper: def override_status(status: str, check_name: str, invert: bool = False) -> str: - if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False): + test_config = CI_CONFIG.test_configs.get(check_name) + if test_config and test_config.force_tests: return "success" if invert: diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index c57780daa36..034e65f204d 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -9,7 +9,7 @@ import os from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI_CONFIG from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -62,9 +62,9 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG["build_config"][build] # type: BuildConfig - if args.rename: - path = temp_path / f"clickhouse-{config['static_binary_name']}" + config = CI_CONFIG.build_config[build] + if args.rename and config.static_binary_name: + path = temp_path / f"clickhouse-{config.static_binary_name}" else: path = temp_path / "clickhouse" diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 41ace95c350..70d37b24c4e 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -71,7 +71,7 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] - required_build = CI_CONFIG["tests_config"][check_name]["required_build"] + required_build = CI_CONFIG.test_configs[check_name].required_build if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -121,15 +121,6 @@ if __name__ == "__main__": ) sys.exit(0) - test_grep_exclude_filter = CI_CONFIG["tests_config"][check_name][ - "test_grep_exclude_filter" - ] - if test_grep_exclude_filter: - docker_env += f" -e CHPC_TEST_GREP_EXCLUDE={test_grep_exclude_filter}" - logging.info( - "Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter - ) - rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") From c279b00c52729190e1bce4cc54a7a9268b14c2e1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 3 Aug 2023 12:55:27 +0000 Subject: [PATCH 100/113] Automatic style fix --- tests/integration/test_dictionary_custom_settings/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py index 6b5ea643998..eb394da8bb6 100644 --- a/tests/integration/test_dictionary_custom_settings/test.py +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -82,5 +82,9 @@ def test_work(start_cluster): # It is possible that the HTTP server takes long time to start accepting connections - assert_eq_with_retry(instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n") - assert_eq_with_retry(instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n') + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n" + ) + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n' + ) From 63a46d7cb0d51745856a9f044c36d6b43ba50029 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Thu, 3 Aug 2023 21:05:31 +0800 Subject: [PATCH 101/113] fix pg integration test --- tests/integration/test_postgresql_database_engine/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index b14d234f7eb..bdf1056765e 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -406,14 +406,17 @@ def test_postgresql_password_leak(started_cluster): ) cursor = conn.cursor() + cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE") cursor.execute("CREATE SCHEMA test_schema") cursor.execute("CREATE TABLE test_schema.table1 (a integer)") cursor.execute("CREATE TABLE table2 (a integer)") + node1.query("DROP DATABASE IF EXISTS postgres_database") node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" ) + node1.query("DROP DATABASE IF EXISTS postgres_database2") node1.query( "CREATE DATABASE postgres_database2 ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) From f0eb22ac5f5cd64dec77550e417db06793fe0d94 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 3 Aug 2023 14:20:19 +0000 Subject: [PATCH 102/113] Fix build for S3Queue --- src/Storages/S3Queue/S3QueueSource.cpp | 23 ++----- src/Storages/S3Queue/S3QueueSource.h | 8 +-- src/Storages/S3Queue/StorageS3Queue.cpp | 83 ++----------------------- 3 files changed, 16 insertions(+), 98 deletions(-) diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 57d2d6304b0..8d1d8e7b143 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -148,22 +148,12 @@ StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next( return KeyWithInfo(); } -Block StorageS3QueueSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageS3QueueSource::StorageS3QueueSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -174,28 +164,27 @@ StorageS3QueueSource::StorageS3QueueSource( std::shared_ptr files_metadata_, const S3QueueAction & action_, const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + : ISource(info.source_header) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) , version_id(version_id_) , format(format_) - , columns_desc(columns_) + , columns_desc(info.columns_description) , request_settings(request_settings_) , client(client_) , files_metadata(files_metadata_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_virtual_columns(info.requested_virtual_columns) + , requested_columns(info.requested_columns) , file_iterator(file_iterator_) , action(action_) { internal_source = std::make_shared( - requested_virtual_columns_, + info, format_, name_, - sample_block_, context_, format_settings_, - columns_, max_block_size_, request_settings_, compression_hint_, diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index a85fce46ad8..523b8c0e81f 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -11,6 +11,7 @@ # include # include # include +# include # include # include @@ -67,13 +68,11 @@ public: static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); StorageS3QueueSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format, String name_, - const Block & sample_block, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -105,7 +104,8 @@ private: using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; - std::vector requested_virtual_columns; + NamesAndTypesList requested_virtual_columns; + NamesAndTypesList requested_columns; std::shared_ptr file_iterator; const S3QueueAction action; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 87bff398172..673c0dde1d2 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -33,6 +33,7 @@ # include # include # include +# include # include @@ -187,7 +188,7 @@ StorageS3Queue::StorageS3Queue( bool StorageS3Queue::supportsSubcolumns() const { - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); + return true; } bool StorageS3Queue::supportsSubsetOfColumns() const @@ -213,55 +214,18 @@ Pipe StorageS3Queue::read( auto query_configuration = updateConfigurationAndGetCopy(local_context); - Pipes pipes; - - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper = createFileIterator(local_context, query_info.query); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; return Pipe(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, query_configuration.request_settings, configuration.compression_method, @@ -425,52 +389,17 @@ void StorageS3Queue::streamToViews() auto column_names = block_io.pipeline.getHeader().getNames(); // Create a stream for each consumer and join them in a union stream - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - requested_virtual_columns.push_back(virtual_column); - } std::shared_ptr iterator_wrapper = createFileIterator(s3queue_context, nullptr); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); const size_t max_download_threads = s3queue_context->getSettingsRef().max_download_threads; - Pipes pipes; - auto pipe = Pipe(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, s3queue_context, format_settings, - columns_description, block_size, query_configuration.request_settings, configuration.compression_method, From 7d00a2d29aca31caa2db285302c78cc9b2154981 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 3 Aug 2023 15:16:20 +0000 Subject: [PATCH 103/113] Fix 02483_elapsed_time --- tests/queries/0_stateless/02483_elapsed_time.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02483_elapsed_time.sh b/tests/queries/0_stateless/02483_elapsed_time.sh index e3b983129fb..fdb23d6da01 100755 --- a/tests/queries/0_stateless/02483_elapsed_time.sh +++ b/tests/queries/0_stateless/02483_elapsed_time.sh @@ -32,7 +32,7 @@ OK_QUERY_JSON=" WITH ( SELECT sleepEachRow(1.0) ) AS sub -SELECT * +SELECT *, sub FROM ( SELECT * @@ -50,7 +50,7 @@ WITH ( SELECT * FROM ( - SELECT * + SELECT *, sub FROM system.one ) FORMAT XML From 49f76b8600371948cf77c8fd59d2ab1c12fcd251 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 3 Aug 2023 18:02:32 +0200 Subject: [PATCH 104/113] Fix flaky test --- .../0_stateless/01605_adaptive_granularity_block_borders.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 4623c456475..f12a61055c4 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,5 @@ --- Tags: no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug +-- no-tsan: too slow SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; From ca67576720f7be4e120ee3ed5f3c4a35b3255b52 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 3 Aug 2023 18:06:00 +0200 Subject: [PATCH 105/113] Do not use inline cache in server images --- tests/ci/docker_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index a434d3cc841..89bd7b7755b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -238,7 +238,7 @@ def build_and_push_image( result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" - init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] + init_args = ["docker", "buildx", "build"] if push: init_args.append("--push") init_args.append("--output=type=image,push-by-digest=true") From 8a7870f2504b3f3f1eed257d5aa3fc87e868a317 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 3 Aug 2023 20:24:29 +0200 Subject: [PATCH 106/113] Add a test to broken tests (Analyzer) --- tests/analyzer_integration_broken_tests.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 02f70c8a6df..1b0c44de1ed 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -201,3 +201,4 @@ test_backward_compatibility/test_data_skipping_indices.py::test_index test_backward_compatibility/test_convert_ordinary.py::test_convert_ordinary_to_atomic test_backward_compatibility/test_memory_bound_aggregation.py::test_backward_compatability test_odbc_interaction/test.py::test_postgres_insert +test_merge_tree_azure_blob_storage/test.py::test_table_manipulations From 94fcae58b91b145a5a09322be24421c1e4f2ea39 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 4 Aug 2023 09:49:51 +0200 Subject: [PATCH 107/113] clickhouse-keeper-client: fix version parsing for set command Previously: / :) set /test foo 1 DB::Exception: Bad get: has UInt64, requested Int64 Signed-off-by: Azat Khuzhin --- programs/keeper-client/Commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 05928a0d20b..3afd49e9855 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -77,7 +77,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].safeGet())); + static_cast(query->args[2].get())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const From 58bea8d0675b07b5144d67d846e4ca9163a4dc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elena=20Torr=C3=B3?= Date: Fri, 4 Aug 2023 11:23:41 +0200 Subject: [PATCH 108/113] Format 's3_upload_part_size_multiply_parts_count_threshold' info --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 051945538b2..c1752ea488c 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -237,7 +237,7 @@ The following settings can be set before query execution or placed into configur - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. -- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`. - `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. From 9279d1e33133d83e6d62fbc4ea167db1703d2fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elena=20Torr=C3=B3?= Date: Fri, 4 Aug 2023 11:26:13 +0200 Subject: [PATCH 109/113] Update setting's description --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 13fde626f16..ae1b063c3c3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -78,7 +78,7 @@ class IColumn; M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ - M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ From 0b312f541acfdde4f37d7364e52da9af215e0a63 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 4 Aug 2023 13:29:28 +0300 Subject: [PATCH 110/113] Revert "Remove try/catch from DatabaseFilesystem" --- .../Serializations/SerializationWrapper.h | 1 + src/Databases/DatabaseFilesystem.cpp | 38 +++++++++++-------- src/Databases/DatabaseFilesystem.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 21 ++-------- .../0_stateless/02722_database_filesystem.sh | 2 +- .../0_stateless/02724_database_s3.reference | 6 +-- .../queries/0_stateless/02724_database_s3.sh | 4 +- .../0_stateless/02725_database_hdfs.reference | 11 +++--- .../0_stateless/02725_database_hdfs.sh | 14 +++---- 9 files changed, 45 insertions(+), 54 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 31900f93148..bf922888af9 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -77,6 +77,7 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 59f9ee67d7b..7eaf474eea0 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -11,11 +11,9 @@ #include #include #include -#include #include - namespace fs = std::filesystem; namespace DB @@ -77,8 +75,10 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont /// Check access for file before checking its existence. if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path)) { - /// Access denied is thrown regardless of 'throw_on_error' - throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); + if (throw_on_error) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); + else + return false; } /// Check if the corresponding file exists. @@ -128,25 +128,20 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) if (tryGetTableFromCache(name)) return true; - return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */ false); + return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false); } -StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_, bool throw_on_error) const +StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const { /// Check if table exists in loaded tables map. if (auto table = tryGetTableFromCache(name)) return table; auto table_path = getTablePath(name); - if (!checkTableFilePath(table_path, context_, throw_on_error)) - return {}; - - String format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error); - if (format.empty()) - return {}; + checkTableFilePath(table_path, context_, /* throw_on_error */true); /// If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); + auto args = makeASTFunction("file", std::make_shared(table_path)); auto table_function = TableFunctionFactory::instance().get(args, context_); if (!table_function) @@ -163,7 +158,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const { /// getTableImpl can throw exceptions, do not catch them to show correct error to user. - if (auto storage = getTableImpl(name, context_, true)) + if (auto storage = getTableImpl(name, context_)) return storage; throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", @@ -172,7 +167,20 @@ StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { - return getTableImpl(name, context_, false); + try + { + return getTableImpl(name, context_); + } + catch (const Exception & e) + { + /// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table + /// see tests/02722_database_filesystem.sh for more details. + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + { + return nullptr; + } + throw; + } } bool DatabaseFilesystem::empty() const diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index b72891b9a5c..7fe620401dc 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -48,7 +48,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; protected: - StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const; + StoragePtr getTableImpl(const String & name, ContextPtr context) const; StoragePtr tryGetTableFromCache(const std::string & name) const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index f8481e3f1d8..13cac5afb1b 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -336,6 +336,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return db_and_table; } + if (table_id.database_name == TEMPORARY_DATABASE) { /// For temporary tables UUIDs are set in Context::resolveStorageID(...). @@ -368,24 +369,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( database = it->second; } - StoragePtr table; - if (exception) - { - try - { - table = database->getTable(table_id.table_name, context_); - } - catch (const Exception & e) - { - exception->emplace(e); - } - } - else - { - table = database->tryGetTable(table_id.table_name, context_); - } - - if (!table && exception && !exception->has_value()) + auto table = database->tryGetTable(table_id.table_name, context_); + if (!table && exception) exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); if (!table) diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 7b2e1bf1a66..3b7a41bb39e 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -61,7 +61,7 @@ CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); """ 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: # FILE_DOESNT_EXIST: unknown file -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 107" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 60" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: Cannot determine the file format by it's extension ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index 437549a973a..425cca6a077 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -17,7 +17,5 @@ test1 16 17 18 0 0 0 Test 2: check exceptions -BAD_ARGUMENTS -BAD_ARGUMENTS -BAD_ARGUMENTS -BAD_ARGUMENTS +OK +OK diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 15e93ff117f..bb8f1f5f7ee 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index 932a0786921..ef8adae2bbc 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -4,8 +4,9 @@ test1 1 2 3 test2 Test 2: check exceptions -BAD_ARGUMENTS -BAD_ARGUMENTS -BAD_ARGUMENTS -CANNOT_EXTRACT_TABLE_STRUCTURE -BAD_ARGUMENTS +OK0 +OK1 +OK2 +OK3 +OK4 +OK5 diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index c258042a917..89ff7421a6f 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash # Tags: no-fasttest, use-hdfs, no-parallel -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -38,19 +36,19 @@ echo "Test 2: check exceptions" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK0" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK3" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK4" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK5" # Cleanup From aa9ab58994f3c75d02b12eee25e9b00537aede1b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 4 Aug 2023 13:31:15 +0300 Subject: [PATCH 111/113] Update 01114_database_atomic.sh (#53043) --- tests/queries/0_stateless/01114_database_atomic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 1b295e5f36f..3e1f9eb1f43 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -13,7 +13,7 @@ DROP DATABASE IF EXISTS test_01114_2; DROP DATABASE IF EXISTS test_01114_3; " -$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2" From 0874b507df1b2012cb10c211dc830f564e1ee068 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 4 Aug 2023 14:09:59 +0300 Subject: [PATCH 112/113] remove obsolete no-upgrade-check tag (#52915) --- tests/clickhouse-test | 7 ------- tests/queries/0_stateless/00061_merge_tree_alter.sql | 1 - .../00626_replace_partition_from_table_zookeeper.sh | 2 +- ...orum_insert_lost_part_and_alive_part_zookeeper_long.sql | 2 +- tests/queries/0_stateless/00942_dataparts_500.sh | 1 - .../00993_system_parts_race_condition_drop_zookeeper.sh | 2 +- .../01079_parallel_alter_detach_table_zookeeper.sh | 2 +- .../0_stateless/01111_create_drop_replicated_db_stress.sh | 2 +- tests/queries/0_stateless/01191_rename_dictionary.sql | 2 +- .../01318_long_unsuccessful_mutation_zookeeper.sh | 2 +- .../0_stateless/01378_alter_rename_with_ttl_zookeeper.sql | 2 +- tests/queries/0_stateless/01391_join_on_dict_crash.sql | 2 +- .../0_stateless/01555_system_distribution_queue_mask.sql | 1 - .../0_stateless/01576_alter_low_cardinality_and_select.sh | 1 - .../01650_fetch_patition_with_macro_in_zk_path_long.sql | 2 +- tests/queries/0_stateless/01780_column_sparse_alter.sql | 1 - .../queries/0_stateless/02022_storage_filelog_one_file.sh | 1 - .../0_stateless/02025_storage_filelog_virtual_col.sh | 1 - tests/queries/0_stateless/02067_lost_part_s3.sql | 2 +- .../02222_create_table_without_columns_metadata.sh | 2 +- tests/queries/0_stateless/02242_delete_user_race.sh | 2 +- tests/queries/0_stateless/02243_drop_user_grant_race.sh | 2 +- tests/queries/0_stateless/02273_full_sort_join.sql.j2 | 4 +--- .../0_stateless/02302_join_auto_lc_nullable_bug.sql | 1 - .../0_stateless/02306_window_move_row_number_fix.sql | 1 - .../queries/0_stateless/02313_cross_join_dup_col_names.sql | 1 - tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql | 1 - tests/queries/0_stateless/02316_const_string_intersact.sql | 1 - .../0_stateless/02320_mapped_array_witn_const_nullable.sql | 1 - .../0_stateless/02332_dist_insert_send_logs_level.sh | 1 - .../02345_partial_sort_transform_optimization.sql | 1 - tests/queries/0_stateless/02354_annoy_index.sql | 2 +- tests/queries/0_stateless/02363_mapupdate_improve.sql | 1 - .../0_stateless/02366_direct_dictionary_dict_has.sql | 1 - tests/queries/0_stateless/02366_with_fill_date.sql | 1 - .../0_stateless/02381_compress_marks_and_primary_key.sql | 2 +- .../02397_system_parts_race_condition_drop_rm.sh | 2 +- tests/queries/0_stateless/02429_low_cardinality_trash.sh | 2 +- .../0_stateless/02450_kill_distributed_query_deadlock.sh | 2 +- tests/queries/0_stateless/02725_start_stop_fetches.sh | 2 +- 40 files changed, 21 insertions(+), 48 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8c4e4dfec47..fc175f2a05a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -505,7 +505,6 @@ class FailureReason(enum.Enum): REPLICATED_DB = "replicated-database" S3_STORAGE = "s3-storage" BUILD = "not running for current build" - NO_UPGRADE_CHECK = "not running for upgrade check" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" # UNKNOWN reasons @@ -949,12 +948,6 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - # TODO: remove checking "no-upgrade-check" after 23.1 - elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags - ): - return FailureReason.NO_UPGRADE_CHECK - elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE elif ( diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.sql b/tests/queries/0_stateless/00061_merge_tree_alter.sql index 2e46b1e16d6..f2a36d6e5a3 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.sql +++ b/tests/queries/0_stateless/00061_merge_tree_alter.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS alter_00061; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 399511db701..c32b6d04a42 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage, no-upgrade-check +# Tags: zookeeper, no-s3-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index 9c02ac795ed..a1859220c6c 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -1,4 +1,4 @@ --- Tags: long, zookeeper, no-replicated-database, no-upgrade-check +-- Tags: long, zookeeper, no-replicated-database -- Tag no-replicated-database: Fails due to additional replicas or shards SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00942_dataparts_500.sh b/tests/queries/0_stateless/00942_dataparts_500.sh index a6c3fcd4303..91c95816590 100755 --- a/tests/queries/0_stateless/00942_dataparts_500.sh +++ b/tests/queries/0_stateless/00942_dataparts_500.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check # Test fix for issue #5066 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 811681794a5..4205f231698 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index e508b77a0c2..8133f866c58 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check +# Tags: zookeeper, no-parallel, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 59899e1c14a..f61a60a0bda 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-upgrade-check +# Tags: race, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 8074e84f0ed..e9fed1dd6b2 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index f9a2ec8a34c..42941b486d6 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-upgrade-check +# Tags: long, zookeeper, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql index 43c9fa43104..b6982910ace 100644 --- a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql +++ b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-upgrade-check +-- Tags: zookeeper DROP TABLE IF EXISTS table_rename_with_ttl; diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 5321e03767f..854da04b334 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS db_01391; CREATE DATABASE db_01391; diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index 61083c3ae14..472e042a18b 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- force data path with the user/pass in it set use_compact_format_in_distributed_parts_names=0; diff --git a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh index 4a9b4beee5b..fcea7f57cd3 100755 --- a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh +++ b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql index b45a1974611..f4afcb8d55e 100644 --- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql +++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-upgrade-check +-- Tags: long DROP TABLE IF EXISTS test_01640; DROP TABLE IF EXISTS restore_01640; diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql index bc2f6f7c91f..f33573e503a 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 3abf5c52031..32ce1643d4e 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index e0f0114d030..0219a0421cb 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index 7df15ab33c4..bfdf9250036 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-fasttest +-- Tags: no-fasttest DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 26646bd91a0..9cb4fb939e7 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-upgrade-check +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 8f387333c33..2af54276469 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel # Test tries to reproduce a race between threads: # - deletes user diff --git a/tests/queries/0_stateless/02243_drop_user_grant_race.sh b/tests/queries/0_stateless/02243_drop_user_grant_race.sh index 46ad776006e..4dce8e8124c 100755 --- a/tests/queries/0_stateless/02243_drop_user_grant_race.sh +++ b/tests/queries/0_stateless/02243_drop_user_grant_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel set -e diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 6b6aa53836e..6500306356c 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -1,6 +1,4 @@ --- Tags: long, no-upgrade-check - --- TODO(@vdimir): remove no-upgrade-check tag after https://github.com/ClickHouse/ClickHouse/pull/51737 is released +-- Tags: long DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 8e0fb4a55a0..7f7285d5472 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index f73525f92be..96dd8f6176b 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1,2 +1 @@ --- Tags: no-upgrade-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql index 08e8843f763..45390c0e8ef 100644 --- a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql +++ b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- https://github.com/ClickHouse/ClickHouse/issues/37561 diff --git a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql index df20e5c42d4..cbf71f1d555 100644 --- a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql +++ b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET join_algorithm = 'partial_merge'; diff --git a/tests/queries/0_stateless/02316_const_string_intersact.sql b/tests/queries/0_stateless/02316_const_string_intersact.sql index 148d048952b..33629d2a56a 100644 --- a/tests/queries/0_stateless/02316_const_string_intersact.sql +++ b/tests/queries/0_stateless/02316_const_string_intersact.sql @@ -1,3 +1,2 @@ --- Tags: no-upgrade-check SELECT 'Play ClickHouse' InterSect SELECT 'Play ClickHouse' diff --git a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql index 734c597051e..1dd06a26894 100644 --- a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql +++ b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check select arrayMap(x -> toNullable(1), range(number)) from numbers(3); select arrayFilter(x -> toNullable(1), range(number)) from numbers(3); diff --git a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh index 503b94be715..5bbe31c9237 100755 --- a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh +++ b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index 07f705acd84..35ec675b7c0 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql index fefb51dfcc9..dbe5d95dd1f 100644 --- a/tests/queries/0_stateless/02354_annoy_index.sql +++ b/tests/queries/0_stateless/02354_annoy_index.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 SET allow_experimental_annoy_index = 1; SET allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/02363_mapupdate_improve.sql b/tests/queries/0_stateless/02363_mapupdate_improve.sql index b4a4b8e5d91..c3cd8fff9ee 100644 --- a/tests/queries/0_stateless/02363_mapupdate_improve.sql +++ b/tests/queries/0_stateless/02363_mapupdate_improve.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS map_test; CREATE TABLE map_test(`tags` Map(String, String)) ENGINE = MergeTree PRIMARY KEY tags ORDER BY tags SETTINGS index_granularity = 8192; INSERT INTO map_test (tags) VALUES (map('fruit','apple','color','red')); diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql index cf9f2971cb0..41334811464 100644 --- a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql +++ b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02366_with_fill_date.sql b/tests/queries/0_stateless/02366_with_fill_date.sql index 4d41facf423..aca57b127af 100644 --- a/tests/queries/0_stateless/02366_with_fill_date.sql +++ b/tests/queries/0_stateless/02366_with_fill_date.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SELECT toDate('2022-02-01') AS d1 FROM numbers(18) AS number diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 2fe0943745d..0c228c13f19 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 39e513f6be4..48efc98f7c7 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, disabled +# Tags: race, zookeeper, no-parallel, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02429_low_cardinality_trash.sh b/tests/queries/0_stateless/02429_low_cardinality_trash.sh index 91618cb2796..e115ee0824e 100755 --- a/tests/queries/0_stateless/02429_low_cardinality_trash.sh +++ b/tests/queries/0_stateless/02429_low_cardinality_trash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index d15acba3837..03c43843d3a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.sh b/tests/queries/0_stateless/02725_start_stop_fetches.sh index 0ca687ae951..c9922455d94 100755 --- a/tests/queries/0_stateless/02725_start_stop_fetches.sh +++ b/tests/queries/0_stateless/02725_start_stop_fetches.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 376eeeb8422c8e356c2f0d9fb894cb7c492dba99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 4 Aug 2023 17:18:50 +0300 Subject: [PATCH 113/113] Fix `test_zookeeper_config` (#52988) * Fix `test_zookeeper_config` * Update tests/integration/test_zookeeper_config/test.py Co-authored-by: Alexander Tokmakov * Update tests/integration/test_zookeeper_config/test.py * Update test.py --------- Co-authored-by: Alexander Tokmakov --- tests/integration/test_zookeeper_config/test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index 65f82c2286b..5e36a97b3ca 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -2,6 +2,7 @@ import time import pytest import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_config_root_a.xml" @@ -56,10 +57,11 @@ def test_chroot_with_same_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) - time.sleep(1) + # Replication might take time - assert node1.query("select count() from simple").strip() == "2" - assert node2.query("select count() from simple").strip() == "2" + assert_eq_with_retry(node1, "select count() from simple", "2\n") + + assert_eq_with_retry(node2, "select count() from simple", "2\n") def test_chroot_with_different_root(started_cluster):