From b7fbd9f0f23a88a4aef5a4dcb8725821739b8c61 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 03:51:59 +0800 Subject: [PATCH 001/356] Make it work --- .../QueryPlan/ReadFromMergeTree.cpp | 5 + src/Storages/ColumnsDescription.cpp | 7 +- src/Storages/ColumnsDescription.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.h | 3 + src/Storages/MergeTree/IMergeTreeReader.cpp | 78 +++++++ src/Storages/MergeTree/IMergeTreeReader.h | 10 + .../MergeTree/MergeTreeBlockReadUtils.cpp | 31 ++- .../MergeTree/MergeTreeBlockReadUtils.h | 4 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 18 +- .../MergeTree/MergeTreeDataPartCompact.h | 1 + .../MergeTree/MergeTreeDataPartInMemory.cpp | 9 +- .../MergeTree/MergeTreeDataPartInMemory.h | 1 + .../MergeTree/MergeTreeDataPartWide.cpp | 15 +- .../MergeTree/MergeTreeDataPartWide.h | 1 + .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 + .../MergeTree/MergeTreePrefetchedReadPool.h | 3 +- .../MergeTree/MergeTreeRangeReader.cpp | 46 ++-- src/Storages/MergeTree/MergeTreeRangeReader.h | 6 +- src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 + src/Storages/MergeTree/MergeTreeReadPool.h | 1 + .../MergeTree/MergeTreeReadPoolBase.cpp | 26 ++- .../MergeTree/MergeTreeReadPoolBase.h | 6 +- .../MergeTree/MergeTreeReadPoolInOrder.cpp | 2 + .../MergeTree/MergeTreeReadPoolInOrder.h | 1 + .../MergeTreeReadPoolParallelReplicas.cpp | 2 + .../MergeTreeReadPoolParallelReplicas.h | 1 + ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 2 + ...MergeTreeReadPoolParallelReplicasInOrder.h | 1 + src/Storages/MergeTree/MergeTreeReadTask.cpp | 40 ++-- src/Storages/MergeTree/MergeTreeReadTask.h | 69 +++--- .../MergeTree/MergeTreeReaderCompact.cpp | 2 + .../MergeTree/MergeTreeReaderCompact.h | 1 + .../MergeTree/MergeTreeReaderInMemory.cpp | 2 + .../MergeTree/MergeTreeReaderInMemory.h | 1 + .../MergeTree/MergeTreeReaderWide.cpp | 2 + src/Storages/MergeTree/MergeTreeReaderWide.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 203 ++++-------------- .../MergeTree/MergeTreeSelectProcessor.h | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 13 +- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 1 + 40 files changed, 337 insertions(+), 289 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index d02e387afc3..473969b3e74 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -353,6 +353,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( reader_settings, required_columns, virt_column_names, + data.getPartitionValueType(), pool_settings, context); @@ -434,6 +435,7 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, virt_column_names, + data.getPartitionValueType(), pool_settings, context); } @@ -447,6 +449,7 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, virt_column_names, + data.getPartitionValueType(), pool_settings, context); } @@ -523,6 +526,7 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, virt_column_names, + data.getPartitionValueType(), pool_settings, context); } @@ -538,6 +542,7 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, virt_column_names, + data.getPartitionValueType(), pool_settings, context); } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 1712b984596..c3b8c3742f7 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -465,6 +465,10 @@ NamesAndTypesList ColumnsDescription::get(const GetColumnsOptions & options) con NamesAndTypesList res; switch (options.kind) { + case GetColumnsOptions::None: + { + break; + } case GetColumnsOptions::All: { res = getAll(); @@ -555,7 +559,8 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) case ColumnDefaultKind::Ephemeral: return GetColumnsOptions::Ephemeral; } - UNREACHABLE(); + + return GetColumnsOptions::None; } NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 9a133f81d7a..b6cf58b5464 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -32,6 +32,7 @@ struct GetColumnsOptions { enum Kind : UInt8 { + None = 0, Ordinary = 1, Materialized = 2, Aliases = 4, diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 640a1f1d0a3..a5ec8744033 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -47,6 +47,8 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; +struct MergeTreeReadTaskInfo; +using MergeTreeReadTaskInfoPtr = std::shared_ptr; enum class DataPartRemovalState { @@ -93,6 +95,7 @@ public: const NamesAndTypesList & columns_, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 63ed8021f58..7ff69f7cc4b 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,7 +1,10 @@ #include +#include +#include #include #include #include +#include #include #include #include @@ -25,6 +28,7 @@ namespace ErrorCodes IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -47,14 +51,21 @@ IMergeTreeReader::IMergeTreeReader( , part_columns(data_part_info_for_read->isWidePart() ? data_part_info_for_read->getColumnsDescriptionWithCollectedNested() : data_part_info_for_read->getColumnsDescription()) + , read_task_info(read_task_info_) { columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); + size_t pos = 0; for (const auto & column : requested_columns) { columns_to_read.emplace_back(getColumnInPart(column)); serializations.emplace_back(getSerializationInPart(column)); + + if (read_task_info && read_task_info->virt_column_names.contains(column.name)) + virt_column_pos_to_name.emplace(pos, column.name); + + ++pos; } } @@ -63,6 +74,73 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() return avg_value_size_hints; } +void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const +{ + if (std::all_of( + virt_column_pos_to_name.begin(), + virt_column_pos_to_name.end(), + [&columns](auto & elem) + { + chassert(elem.first < columns.size()); + return columns[elem.first] != nullptr; + })) + return; + + chassert(read_task_info != nullptr); + + const IMergeTreeDataPart * part = read_task_info->data_part.get(); + if (part->isProjectionPart()) + part = part->getParentPart(); + + for (auto [pos, name] : virt_column_pos_to_name) + { + auto & column = columns[pos]; + + if (column != nullptr) + continue; + + if (name == "_part_offset") + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column {} must have been filled by part reader", name); + } + else if (name == LightweightDeleteDescription::FILTER_COLUMN.name) + { + /// If _row_exists column isn't present in the part then fill it here with 1s + column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); + } + else if (name == BlockNumberColumn::name) + { + column = BlockNumberColumn::type->createColumnConst(rows, part->info.min_block)->convertToFullColumnIfConst(); + } + else if (name == "_part") + { + column = DataTypeLowCardinality{std::make_shared()} + .createColumnConst(rows, part->name) + ->convertToFullColumnIfConst(); + } + else if (name == "_part_index") + { + column = DataTypeUInt64().createColumnConst(rows, read_task_info->part_index_in_query)->convertToFullColumnIfConst(); + } + else if (name == "_part_uuid") + { + column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); + } + else if (name == "_partition_id") + { + column = DataTypeLowCardinality{std::make_shared()} + .createColumnConst(rows, part->info.partition_id) + ->convertToFullColumnIfConst(); + } + else if (name == "_partition_value") + { + column = read_task_info->partition_value_type + ->createColumnConst(rows, Tuple(part->partition.value.begin(), part->partition.value.end())) + ->convertToFullColumnIfConst(); + } + } +} + void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const { try diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 997be064f28..cd4417265fa 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -23,6 +23,7 @@ public: IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -42,6 +43,9 @@ public: const ValueSizeMap & getAvgValueSizeHints() const; + /// Add virtual columns that are not present in the block. + void fillVirtualColumns(Columns & columns, size_t rows) const; + /// Add columns from ordered_names that are not present in the block. /// Missing columns are added in the order specified by ordered_names. /// num_rows is needed in case if all res_columns are nullptr. @@ -113,6 +117,12 @@ private: /// Actual columns description in part. const ColumnsDescription & part_columns; + + /// Shared information required for reading. + MergeTreeReadTaskInfoPtr read_task_info; + + /// Map of positions in requested_columns which are virtual columns to their names. + std::map virt_column_pos_to_name; }; } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index f5f0fa6f726..fc6599b2851 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -111,11 +111,18 @@ NameSet injectRequiredColumns( if (with_subcolumns) options.withSubcolumns(); + auto virtuals_options = GetColumnsOptions(GetColumnsOptions::None).withVirtuals(); + for (size_t i = 0; i < columns.size(); ++i) { - /// We are going to fetch only physical columns and system columns + /// We are going to fetch physical columns and system columns first if (!storage_snapshot->tryGetColumn(options, columns[i])) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]); + { + if (storage_snapshot->tryGetColumn(virtuals_options, columns[i])) + continue; + else + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]); + } have_at_least_one_physical_column |= injectRequiredColumnsRecursively( columns[i], storage_snapshot, alter_conversions, @@ -258,11 +265,10 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum } -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, @@ -270,16 +276,11 @@ MergeTreeReadTask::Columns getReadTaskColumns( { Names column_to_read_after_prewhere = required_columns; - /// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part - for (const auto & name : system_columns) - if (data_part_info_for_reader.getColumns().contains(name)) - column_to_read_after_prewhere.push_back(name); - /// Inject columns required for defaults evaluation injectRequiredColumns( data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere); - MergeTreeReadTask::Columns result; + MergeTreeReadTaskColumns result; auto options = GetColumnsOptions(GetColumnsOptions::All) .withExtendedObjects() .withSystemColumns(); @@ -287,6 +288,9 @@ MergeTreeReadTask::Columns getReadTaskColumns( if (with_subcolumns) options.withSubcolumns(); + options.withVirtuals(); + + bool has_part_offset = std::find(required_columns.begin(), required_columns.end(), "_part_offset") != required_columns.end(); NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { @@ -302,6 +306,13 @@ MergeTreeReadTask::Columns getReadTaskColumns( if (!columns_from_previous_steps.contains(name)) step_column_names.push_back(name); + /// Make sure _part_offset is read in STEP 0 + if (columns_from_previous_steps.empty() && has_part_offset) + { + if (std::find(step_column_names.begin(), step_column_names.end(), "_part_offset") == step_column_names.end()) + step_column_names.push_back("_part_offset"); + } + if (!step_column_names.empty()) injectRequiredColumns( data_part_info_for_reader, storage_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 9417d47814a..b19c42c8db8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -22,11 +21,10 @@ NameSet injectRequiredColumns( bool with_subcolumns, Names & columns); -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 0ecd7abe183..7baba26d15c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -33,6 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -41,12 +42,21 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const ReadBufferFromFileBase::ProfileCallback & profile_callback) const { auto read_info = std::make_shared(shared_from_this(), alter_conversions); - auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; + auto * load_marks_threadpool + = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, load_marks_threadpool, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + read_task_info_, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + load_marks_threadpool, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 35a358b3720..479cb23c3e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -32,6 +32,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 2f01dbfe04b..d4d04bd55c2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -33,6 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, const AlterConversionsPtr & alter_conversions, @@ -44,7 +45,13 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); + read_info, + ptr, + columns_to_read, + read_task_info_, + storage_snapshot, + mark_ranges, + reader_settings); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 27f8ba4bccb..d3e6a9a5b27 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -21,6 +21,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index dc6c1f0019d..5c8b3f9e357 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -31,6 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -40,10 +41,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( { auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( - read_info, columns_to_read, - storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + read_task_info_, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 14147c4ad56..7bc4fe8c777 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -27,6 +27,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const MergeTreeReadTaskInfoPtr & read_task_info_, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 3f9632637b6..da50a39d5b1 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -114,6 +114,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -124,6 +125,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( reader_settings_, column_names_, virtual_column_names_, + partition_value_type_, settings_, context_) , WithContext(context_) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 9925d4e2fa4..5845e72a873 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -24,6 +24,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); @@ -67,7 +68,7 @@ private: struct ThreadTask { - using InfoPtr = MergeTreeReadTask::InfoPtr; + using InfoPtr = MergeTreeReadTaskInfoPtr; ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_) : read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index cce7e56dda9..81263085f75 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -808,8 +808,7 @@ MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names_) + bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity())) , prev_reader(prev_reader_) @@ -826,21 +825,6 @@ MergeTreeRangeReader::MergeTreeRangeReader( result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); } - for (const auto & column_name : non_const_virtual_column_names_) - { - if (result_sample_block.has(column_name)) - continue; - - non_const_virtual_column_names.push_back(column_name); - - if (column_name == "_part_offset" && !prev_reader) - { - /// _part_offset column is filled by the first reader. - read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - } - } - if (prewhere_info) { const auto & step = *prewhere_info; @@ -1006,6 +990,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (num_read_rows == 0) num_read_rows = read_result.num_rows; + merge_tree_reader->fillVirtualColumns(columns, num_read_rows); + /// fillMissingColumns() must be called after reading but befoe any filterings because /// some columns (e.g. arrays) might be only partially filled and thus not be valid and /// fillMissingColumns() fixes this. @@ -1056,22 +1042,23 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar { /// Physical columns go first and then some virtual columns follow - size_t physical_columns_count = merge_tree_reader->getColumns().size(); - Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); + size_t columns_count = merge_tree_reader->getColumns().size(); + Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count); + merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows); bool should_evaluate_missing_defaults; - merge_tree_reader->fillMissingColumns(physical_columns, should_evaluate_missing_defaults, read_result.num_rows); + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows); /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults({}, physical_columns); + merge_tree_reader->evaluateMissingDefaults({}, columns); /// If result not empty, then apply on-fly alter conversions if any required if (!prewhere_info || prewhere_info->perform_alter_conversions) - merge_tree_reader->performRequiredConversions(physical_columns); + merge_tree_reader->performRequiredConversions(columns); - for (size_t i = 0; i < physical_columns.size(); ++i) - read_result.columns[i] = std::move(physical_columns[i]); + for (size_t i = 0; i < columns.size(); ++i) + read_result.columns[i] = std::move(columns[i]); } size_t total_bytes = 0; @@ -1163,12 +1150,17 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.adjustLastGranule(); if (read_sample_block.has("_part_offset")) - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + { + size_t pos = read_sample_block.getPositionByName("_part_offset"); + chassert(pos < result.columns.size()); + chassert(result.columns[pos] == nullptr); + result.columns[pos] = fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + } return result; } -void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) +ColumnPtr MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) { size_t num_rows = result.numReadRows(); @@ -1194,7 +1186,7 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead *pos++ = start_part_offset++; } - result.columns.emplace_back(std::move(column)); + return column; } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 04d42138963..77dcc8853bb 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -101,8 +101,7 @@ public: IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names); + bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -309,7 +308,7 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result) const; - void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); + ColumnPtr fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; @@ -323,7 +322,6 @@ private: bool last_reader_in_chain = false; bool is_initialized = false; - Names non_const_virtual_column_names; Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); }; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 8ed7a9d8707..dbd27aebc21 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -40,6 +40,7 @@ MergeTreeReadPool::MergeTreeReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -50,6 +51,7 @@ MergeTreeReadPool::MergeTreeReadPool( reader_settings_, column_names_, virtual_column_names_, + partition_value_type_, settings_, context_) , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 3a1af947cae..769d1bfdd28 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -32,6 +32,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 446baccd961..d5811c33f00 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,6 +13,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) @@ -22,6 +23,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( , reader_settings(reader_settings_) , column_names(column_names_) , virtual_column_names(virtual_column_names_) + , partition_value_type(partition_value_type_) , pool_settings(pool_settings_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) @@ -44,7 +46,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() assertSortedAndNonIntersecting(part_with_ranges.ranges); #endif - MergeTreeReadTask::Info read_task_info; + MergeTreeReadTaskInfo read_task_info; read_task_info.data_part = part_with_ranges.data_part; read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; @@ -52,10 +54,22 @@ void MergeTreeReadPoolBase::fillPerPartInfos() LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); + Names column_and_virtual_column_names; + column_and_virtual_column_names.reserve(column_names.size() + virtual_column_names.size()); + column_and_virtual_column_names.insert(column_and_virtual_column_names.end(), column_names.begin(), column_names.end()); + column_and_virtual_column_names.insert( + column_and_virtual_column_names.end(), virtual_column_names.begin(), virtual_column_names.end()); read_task_info.task_columns = getReadTaskColumns( - part_info, storage_snapshot, column_names, virtual_column_names, - prewhere_info, actions_settings, - reader_settings, /*with_subcolumns=*/ true); + part_info, + storage_snapshot, + column_and_virtual_column_names, + prewhere_info, + actions_settings, + reader_settings, + /*with_subcolumns=*/true); + + read_task_info.virt_column_names = {virtual_column_names.begin(), virtual_column_names.end()}; + read_task_info.partition_value_type = partition_value_type; if (pool_settings.preferred_block_size_bytes > 0) { @@ -75,7 +89,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() } is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); - per_part_infos.push_back(std::make_shared(std::move(read_task_info))); + per_part_infos.push_back(std::make_shared(std::move(read_task_info))); } } @@ -97,7 +111,7 @@ std::vector MergeTreeReadPoolBase::getPerPartSumMarks() const } MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 0081063cd37..3aa9eb8670e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -29,6 +29,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); @@ -43,6 +44,7 @@ protected: const MergeTreeReaderSettings reader_settings; const Names column_names; const Names virtual_column_names; + const DataTypePtr partition_value_type; const PoolSettings pool_settings; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; @@ -52,13 +54,13 @@ protected: std::vector getPerPartSumMarks() const; MergeTreeReadTaskPtr createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const; MergeTreeReadTask::Extras getExtras() const; - std::vector per_part_infos; + std::vector per_part_infos; std::vector is_part_on_remote_disk; ReadBufferFromFileBase::ProfileCallback profile_callback; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 1b621ad5055..692e45993c7 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -18,6 +18,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -28,6 +29,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( reader_settings_, column_names_, virtual_column_names_, + partition_value_type_, settings_, context_) , has_limit_below_one_block(has_limit_below_one_block_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index d9cc1ba4984..de7457dfab8 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -17,6 +17,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 47436ed1407..fb14dfe9a6e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -19,6 +19,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -29,6 +30,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( reader_settings_, column_names_, virtual_column_names_, + partition_value_type_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 7579a892b67..0b17bcee52b 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -17,6 +17,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index a822a517933..d242d1e81fe 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -18,6 +18,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -28,6 +29,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd reader_settings_, column_names_, virtual_column_names_, + partition_value_type_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 3e5f8f5dfba..09935e1be2f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -18,6 +18,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, + const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index dcfed700fac..f08ab55adbd 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -String MergeTreeReadTask::Columns::dump() const +String MergeTreeReadTaskColumns::dump() const { WriteBufferFromOwnString s; for (size_t i = 0; i < pre_columns.size(); ++i) @@ -22,7 +22,7 @@ String MergeTreeReadTask::Columns::dump() const } MergeTreeReadTask::MergeTreeReadTask( - InfoPtr info_, + MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_) @@ -34,16 +34,23 @@ MergeTreeReadTask::MergeTreeReadTask( } MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( - const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) + const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) { Readers new_readers; auto create_reader = [&](const NamesAndTypesList & columns_to_read) { return read_info->data_part->getReader( - columns_to_read, extras.storage_snapshot, ranges, - extras.uncompressed_cache, extras.mark_cache, - read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback); + columns_to_read, + extras.storage_snapshot, + ranges, + read_info, + extras.uncompressed_cache, + extras.mark_cache, + read_info->alter_conversions, + extras.reader_settings, + extras.value_size_map, + extras.profile_callback); }; new_readers.main = create_reader(read_info->task_columns.columns); @@ -58,10 +65,8 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( return new_readers; } -MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( - const Readers & task_readers, - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +MergeTreeReadTask::RangeReaders +MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const PrewhereExprInfo & prewhere_actions) { MergeTreeReadTask::RangeReaders new_range_readers; if (prewhere_actions.steps.size() != task_readers.prewhere.size()) @@ -77,10 +82,7 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( { last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); - MergeTreeRangeReader current_reader( - task_readers.prewhere[i].get(), - prev_reader, prewhere_actions.steps[i].get(), - last_reader, non_const_virtual_column_names); + MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader); new_range_readers.prewhere.push_back(std::move(current_reader)); prev_reader = &new_range_readers.prewhere.back(); @@ -88,11 +90,11 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( if (!last_reader) { - new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names); + new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true); } else { - /// If all columns are read by prewhere range readers than move last prewhere range reader to main. + /// If all columns are read by prewhere range readers, move last prewhere range reader to main. new_range_readers.main = std::move(new_range_readers.prewhere.back()); new_range_readers.prewhere.pop_back(); } @@ -100,14 +102,12 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( return new_range_readers; } -void MergeTreeReadTask::initializeRangeReaders( - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere_actions) { if (range_readers.main.isInitialized()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized"); - range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names); + range_readers = createRangeReaders(readers, prewhere_actions); } UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 8d2f0657fd1..7e935f5d28d 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -40,36 +40,40 @@ enum class MergeTreeReadType ParallelReplicas, }; +struct MergeTreeReadTaskColumns +{ + /// Column names to read during WHERE + NamesAndTypesList columns; + /// Column names to read during each PREWHERE step + std::vector pre_columns; + + String dump() const; +}; + +struct MergeTreeReadTaskInfo +{ + /// Data part which should be read while performing this task + DataPartPtr data_part; + /// For `part_index` virtual column + size_t part_index_in_query; + /// Alter converversionss that should be applied on-fly for part. + AlterConversionsPtr alter_conversions; + /// Column names to read during PREWHERE and WHERE + MergeTreeReadTaskColumns task_columns; + /// Virtual column names to read + NameSet virt_column_names; + /// For `partition_value` virtual column + DataTypePtr partition_value_type; + /// Shared initialized size predictor. It is copied for each new task. + MergeTreeBlockSizePredictorPtr shared_size_predictor; +}; + +using MergeTreeReadTaskInfoPtr = std::shared_ptr; + /// A batch of work for MergeTreeSelectProcessor struct MergeTreeReadTask : private boost::noncopyable { public: - struct Columns - { - /// Column names to read during WHERE - NamesAndTypesList columns; - /// Column names to read during each PREWHERE step - std::vector pre_columns; - - String dump() const; - }; - - struct Info - { - /// Data part which should be read while performing this task - DataPartPtr data_part; - /// For virtual `part_index` virtual column - size_t part_index_in_query; - /// Alter converversionss that should be applied on-fly for part. - AlterConversionsPtr alter_conversions; - /// Column names to read during PREWHERE and WHERE - Columns task_columns; - /// Shared initialized size predictor. It is copied for each new task. - MergeTreeBlockSizePredictorPtr shared_size_predictor; - }; - - using InfoPtr = std::shared_ptr; - /// Extra params that required for creation of reader. struct Extras { @@ -115,27 +119,28 @@ public: size_t num_read_bytes = 0; }; - MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); + MergeTreeReadTask( + MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); - void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions); BlockAndProgress read(const BlockSizeParams & params); bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); } - const Info & getInfo() const { return *info; } + const MergeTreeReadTaskInfo & getInfo() const { return *info; } const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; } const IMergeTreeReader & getMainReader() const { return *readers.main; } Readers releaseReaders() { return std::move(readers); } - static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); - static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + static Readers createReaders(const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); + static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions); private: UInt64 estimateNumRows(const BlockSizeParams & params) const; /// Shared information required for reading. - InfoPtr info; + MergeTreeReadTaskInfoPtr info; /// Readers for data_part of this task. /// May be reused and released to the next task. diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 02048009296..65b578e065d 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -29,6 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, + read_task_info_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index dace4ec468e..c87e4889d26 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,6 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index bacd86511f5..e628bd76cb4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,12 +19,14 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, + read_task_info_, storage_snapshot_, nullptr, nullptr, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index e26a98f0916..161b615a511 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,6 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 640432ef755..8270f2452c2 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,6 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -35,6 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, + read_task_info_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 2a850cc2814..ecfaa43a3f8 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,6 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const MergeTreeReadTaskInfoPtr & read_task_info_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index aeff438f509..264ceae3655 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -24,19 +24,6 @@ namespace ErrorCodes extern const int QUERY_WAS_CANCELLED; } -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task = nullptr); - -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, @@ -71,15 +58,9 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( lightweight_delete_filter_step = std::make_shared(std::move(step)); } - header_without_const_virtual_columns = applyPrewhereActions(pool->getHeader(), prewhere_info); - size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); - injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - - for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) - non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); - - result_header = header_without_const_virtual_columns; - injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + result_header = pool->getHeader(); + injectVirtualColumns(result_header, partition_value_type, virt_column_names); + result_header = applyPrewhereActions(result_header, prewhere_info); if (!prewhere_actions.steps.empty()) LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); @@ -163,8 +144,6 @@ ChunkAndProgress MergeTreeSelectProcessor::read() if (res.row_count) { - injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names); - /// Reorder the columns according to result_header Columns ordered_columns; ordered_columns.reserve(result_header.columns()); @@ -198,7 +177,7 @@ void MergeTreeSelectProcessor::initializeRangeReaders() for (const auto & step : prewhere_actions.steps) all_prewhere_actions.steps.push_back(step); - task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names); + task->initializeRangeReaders(all_prewhere_actions); } @@ -208,8 +187,6 @@ namespace { explicit VirtualColumnsInserter(Block & block_) : block(block_) {} - bool columnExists(const String & name) const { return block.has(name); } - void insertUInt8Column(const ColumnPtr & column, const String & name) { block.insert({column, std::make_shared(), name}); @@ -230,16 +207,9 @@ namespace block.insert({column, std::make_shared(std::make_shared()), name}); } - void insertPartitionValueColumn( - size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name) + void insertPartitionValueColumn(const DataTypePtr & partition_value_type, const String & name) { - ColumnPtr column; - if (rows) - column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end())) - ->convertToFullColumnIfConst(); - else - column = partition_value_type->createColumn(); - + ColumnPtr column = partition_value_type->createColumn(); block.insert({column, partition_value_type, name}); } @@ -247,154 +217,55 @@ namespace }; } -/// Adds virtual columns that are not const for all rows -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task) +void MergeTreeSelectProcessor::injectVirtualColumns(Block & block, const DataTypePtr & partition_value_type, const Names & virtual_columns) { VirtualColumnsInserter inserter(block); + + /// add virtual columns + /// Except _sample_factor, which is added from the outside. for (const auto & virtual_column_name : virtual_columns) { if (virtual_column_name == "_part_offset") { - if (!rows) - { - inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); - } - else - { - if (!inserter.columnExists(virtual_column_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} must have been filled part reader", - virtual_column_name); - } + inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); } - - if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + else if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) { - /// If _row_exists column isn't present in the part then fill it here with 1s - ColumnPtr column; - if (rows) - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); - else - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); - - inserter.insertUInt8Column(column, virtual_column_name); + ColumnPtr column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); + inserter.insertUInt8Column(column, virtual_column_name); } - - if (virtual_column_name == BlockNumberColumn::name) + else if (virtual_column_name == BlockNumberColumn::name) { - ColumnPtr column; - if (rows) - { - size_t value = 0; - if (task) - { - value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0; - } - column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst(); - } - else - column = BlockNumberColumn::type->createColumn(); - + ColumnPtr column = BlockNumberColumn::type->createColumn(); inserter.insertUInt64Column(column, virtual_column_name); } - } -} - -/// Adds virtual columns that are const for the whole part -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - /// add virtual columns - /// Except _sample_factor, which is added from the outside. - if (!virtual_columns.empty()) - { - if (unlikely(rows && !task)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); - - const IMergeTreeDataPart * part = nullptr; - - if (rows) + else if (virtual_column_name == "_part") { - part = task->getInfo().data_part.get(); - if (part->isProjectionPart()) - part = part->getParentPart(); + ColumnPtr column = DataTypeLowCardinality{std::make_shared()}.createColumn(); + inserter.insertLowCardinalityColumn(column, virtual_column_name); } - - for (const auto & virtual_column_name : virtual_columns) + else if (virtual_column_name == "_part_index") { - if (virtual_column_name == "_part") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->name) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_part_index") - { - ColumnPtr column; - if (rows) - column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst(); - else - column = DataTypeUInt64().createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part_uuid") - { - ColumnPtr column; - if (rows) - column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); - else - column = DataTypeUUID().createColumn(); - - inserter.insertUUIDColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_id") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->info.partition_id) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_value") - { - if (rows) - inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); - else - inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); - } + ColumnPtr column = DataTypeUInt64().createColumn(); + inserter.insertUInt64Column(column, virtual_column_name); + } + else if (virtual_column_name == "_part_uuid") + { + ColumnPtr column = DataTypeUUID().createColumn(); + inserter.insertUUIDColumn(column, virtual_column_name); + } + else if (virtual_column_name == "_partition_id") + { + ColumnPtr column = DataTypeLowCardinality{std::make_shared()}.createColumn(); + inserter.insertLowCardinalityColumn(column, virtual_column_name); + } + else if (virtual_column_name == "_partition_value") + { + inserter.insertPartitionValueColumn(partition_value_type, virtual_column_name); } } } -void MergeTreeSelectProcessor::injectVirtualColumns( - Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves. - /// Note that the order is important: virtual columns filled by the range reader must go first - injectNonConstVirtualColumns(row_count, block, virtual_columns,task); - injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); -} - Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) { if (prewhere_info) @@ -449,8 +320,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere Block MergeTreeSelectProcessor::transformHeader( Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) { + injectVirtualColumns(block, partition_value_type, virtual_columns); auto transformed = applyPrewhereActions(std::move(block), prewhere_info); - injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns); return transformed; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index cf1a6313b51..1d3618e94c2 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -82,7 +82,7 @@ private: }; /// Used for filling header with no rows as well as block with data - static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); + static void injectVirtualColumns(Block & block, const DataTypePtr & partition_value_type, const Names & virtual_columns); static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); /// Sets up range readers corresponding to data readers @@ -104,10 +104,6 @@ private: MergeTreeReadTaskPtr task; /// This step is added when the part has lightweight delete mask PrewhereExprStepPtr lightweight_delete_filter_step; - /// These columns will be filled by the merge tree range reader - Names non_const_virtual_column_names; - /// This header is used for chunks from readFromPart(). - Block header_without_const_virtual_columns; /// A result of getHeader(). A chunk which this header is returned from read(). Block result_header; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 82e9f8fd2db..7281c0ddc82 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -179,9 +179,16 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot, - *mark_ranges, /* uncompressed_cache = */ nullptr, - mark_cache.get(), alter_conversions, reader_settings, {}, {}); + columns_for_reader, + storage_snapshot, + *mark_ranges, + /* read_task_info = */ nullptr, + /* uncompressed_cache = */ nullptr, + mark_cache.get(), + alter_conversions, + reader_settings, + {}, + {}); } Chunk MergeTreeSequentialSource::generate() diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 43e3b0c505a..0621966a813 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -232,6 +232,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// 3. Sort condition nodes by the number of columns used in them and the overall size of those columns /// TODO: not sorting for now because the conditions are already sorted by Where Optimizer + /// TODO(amos): _part_offset must come first /// 4. Group conditions with the same set of columns into a single read/compute step std::vector> condition_groups; From b2e6a0d69a6abb5d9a11a0c5311c09324042af87 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 12:48:07 +0800 Subject: [PATCH 002/356] Style fix --- src/Storages/MergeTree/MergeTreeSelectProcessor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 264ceae3655..ad8b280a7e6 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -20,7 +20,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; } From 1ea48239b211bd080bec47444c4e505176ea8e90 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 16:06:24 +0800 Subject: [PATCH 003/356] Fix test --- .../02235_add_part_offset_virtual_column.reference | 2 ++ .../0_stateless/02235_add_part_offset_virtual_column.sql | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 2455f50b7f2..e88abb35ab4 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -50,6 +50,8 @@ SOME GRANULES FILTERED OUT 100002 foo PREWHERE 301408 164953047376 164953047376 +335872 166463369216 166463369216 +301407 164952947376 164952947376 42 10042 20042 diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index dc8fceddc52..5af6565c03d 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; From e0e2e35b9cdeda91f5757f3592d764104bd9ff3f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 20:02:22 +0800 Subject: [PATCH 004/356] Fix some comments --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 81263085f75..968738b4341 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -362,7 +362,7 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRo } } -/// The main invariant of the data in the read result is that he number of rows is +/// The main invariant of the data in the read result is that the number of rows is /// either equal to total_rows_per_granule (if filter has not been applied) or to the number of /// 1s in the filter (if filter has been applied). void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const @@ -1200,7 +1200,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (result.rows_per_granule.empty()) { - /// If zero rows were read on prev step, than there is no more rows to read. + /// If zero rows were read on prev step, there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. stream.finish(); return columns; From c3bb97dfdf62b6954d334a0f957343c9e7a7fae5 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 20:02:36 +0800 Subject: [PATCH 005/356] Add tests --- .../03000_virtual_columns_in_prewhere.reference | 1 + .../0_stateless/03000_virtual_columns_in_prewhere.sql | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference create mode 100644 tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference new file mode 100644 index 00000000000..06b63ea6c2f --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference @@ -0,0 +1 @@ +0 0 0 diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql new file mode 100644 index 00000000000..9a24f797c13 --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql @@ -0,0 +1,10 @@ +drop table if exists x; + +create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, min_bytes_for_wide_part=0, min_rows_for_wide_part=0; + +insert into x select number, number * 2, number * 3 from numbers(100000); + +-- One granule, (_part_offset (8 bytes) + (4 bytes)) * 8192 + (8 bytes) * 1 = 98312 +select * from x prewhere _part_offset = 0 settings max_bytes_to_read = 98312; + +drop table x; From ce3242170bf492d4f4e61cdd6e4498b0f6cdef57 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 20:08:23 +0800 Subject: [PATCH 006/356] Remove comment --- src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 0621966a813..43e3b0c505a 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -232,7 +232,6 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// 3. Sort condition nodes by the number of columns used in them and the overall size of those columns /// TODO: not sorting for now because the conditions are already sorted by Where Optimizer - /// TODO(amos): _part_offset must come first /// 4. Group conditions with the same set of columns into a single read/compute step std::vector> condition_groups; From 18d84696675699b60f4dba6545df892a92f86f78 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Jan 2024 20:09:20 +0800 Subject: [PATCH 007/356] Remove another comment --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 968738b4341..ff86ec01efa 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1041,7 +1041,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar return read_result; { - /// Physical columns go first and then some virtual columns follow size_t columns_count = merge_tree_reader->getColumns().size(); Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count); merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows); From c0eeeb26a97543e1147b848e2534acb36d4e90d2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 23 Jan 2024 00:27:01 +0800 Subject: [PATCH 008/356] Disable sparse column in test --- tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql index 9a24f797c13..d57db9151b9 100644 --- a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql @@ -1,6 +1,6 @@ drop table if exists x; -create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, min_bytes_for_wide_part=0, min_rows_for_wide_part=0; +create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, index_granularity_bytes = '10Mi', min_bytes_for_wide_part=0, min_rows_for_wide_part=0, ratio_of_defaults_for_sparse_serialization=1; insert into x select number, number * 2, number * 3 from numbers(100000); From c4d84ff9fd0e2b6d41225d2b8fac658f64729013 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 7 Feb 2024 12:08:17 +0800 Subject: [PATCH 009/356] trivial opt on filter --- src/Processors/Transforms/FilterTransform.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index ea4dcff4808..94281301704 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -265,6 +265,7 @@ void FilterTransform::doTransform(Chunk & chunk) { size_t num_rows_before_filtration = chunk.getNumRows(); auto columns = chunk.detachColumns(); + DataTypes types; auto select_final_indices_info = getSelectByFinalIndices(chunk); { @@ -275,6 +276,7 @@ void FilterTransform::doTransform(Chunk & chunk) expression->execute(block, num_rows_before_filtration); columns = block.getColumns(); + types = block.getDataTypes(); } if (constant_filter_description.always_true || on_totals) @@ -325,7 +327,8 @@ void FilterTransform::doTransform(Chunk & chunk) size_t first_non_constant_column = num_columns; for (size_t i = 0; i < num_columns; ++i) { - if (i != filter_column_position && !isColumnConst(*columns[i])) + if (i != filter_column_position && !isColumnConst(*columns[i]) + && removeNullableOrLowCardinalityNullable(types[i])->isValueRepresentedByNumber()) { first_non_constant_column = i; break; From dd8680018ff6df4e940b84b0e2a21f05ef0c1756 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 9 Feb 2024 16:43:20 +0100 Subject: [PATCH 010/356] Analyzer: WIP on test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed --- src/Interpreters/getHeaderForProcessingStage.cpp | 2 +- src/Storages/StorageDistributed.cpp | 1 - src/Storages/StorageSnapshot.cpp | 8 ++++++++ src/Storages/StorageSnapshot.h | 3 +++ tests/analyzer_integration_broken_tests.txt | 1 - 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index d16e01ef2d2..1daf776b8bc 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -153,7 +153,7 @@ Block getHeaderForProcessingStage( if (context->getSettingsRef().allow_experimental_analyzer) { - auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns()); + auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->getAllColumnsDescription()); InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze()); result = interpreter.getSampleBlock(); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5fb404da1cf..6922261a823 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -295,7 +295,6 @@ NamesAndTypesList StorageDistributed::getVirtuals() const /// NOTE This is weird. Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared(std::make_shared())), NameAndTypePair("_part", std::make_shared(std::make_shared())), NameAndTypePair("_part_index", std::make_shared()), NameAndTypePair("_part_uuid", std::make_shared()), diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 34c092c7208..8f5f4209efc 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -39,6 +39,14 @@ void StorageSnapshot::init() system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; } +ColumnsDescription StorageSnapshot::getAllColumnsDescription() const +{ + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + auto column_names_and_types = getColumns(get_column_options); + + return ColumnsDescription{column_names_and_types}; +} + NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const { auto all_columns = getMetadataForQuery()->getColumns().get(options); diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index d62e118e1f2..a5724f04967 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -62,6 +62,9 @@ struct StorageSnapshot std::shared_ptr clone(DataPtr data_) const; + /// Get columns description + ColumnsDescription getAllColumnsDescription() const; + /// Get all available columns with types according to options. NamesAndTypesList getColumns(const GetColumnsOptions & options) const; diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index c04ed440c18..31f626a23f7 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -6,7 +6,6 @@ test_distributed_type_object/test.py::test_distributed_type_object test_executable_table_function/test.py::test_executable_function_input_python test_mask_sensitive_info/test.py::test_encryption_functions test_merge_table_over_distributed/test.py::test_global_in -test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster From 093f629e4ed7b650efe6571bb7c7c36112b0d9f5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Feb 2024 14:12:44 +0000 Subject: [PATCH 011/356] Update reference file --- .../0_stateless/02890_describe_table_options.reference | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 5d99df36bb4..8842eca3311 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -65,7 +65,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 1 │ │ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ │ _part_index │ UInt64 │ │ │ │ │ │ 1 │ │ _part_uuid │ UUID │ │ │ │ │ │ 1 │ @@ -104,7 +103,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ │ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ │ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ │ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ @@ -183,7 +181,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ 0 │ -│ _table │ LowCardinality(String) │ 1 │ │ _part │ LowCardinality(String) │ 1 │ │ _part_index │ UInt64 │ 1 │ │ _part_uuid │ UUID │ 1 │ @@ -222,7 +219,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ 0 │ 1 │ │ _part │ LowCardinality(String) │ 0 │ 1 │ │ _part_index │ UInt64 │ 0 │ 1 │ │ _part_uuid │ UUID │ 0 │ 1 │ From 8d2ad5383bea6cab2e2107e991f17359cb36ac4f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Feb 2024 15:19:08 +0000 Subject: [PATCH 012/356] Fix execution name for constants --- src/Planner/PlannerActionsVisitor.cpp | 9 +++-- src/Storages/StorageMerge.cpp | 47 ++++++++++++++------------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 511e9396a35..8fc200e7d38 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -88,7 +88,10 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + if (constant_node.hasSourceExpression()) + result = calculateActionNodeName(constant_node.getSourceExpression()); + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); break; } case QueryTreeNodeType::FUNCTION: @@ -527,7 +530,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = constant_node.hasSourceExpression() + ? action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()) + : calculateConstantActionNodeName(constant_literal, constant_type); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79d7b83cada..029ab4d4e4c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,34 +1001,37 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (!allow_experimental_analyzer) { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + if (has_database_virtual_column && !pipe_header.has("_database")) + { + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } - if (has_table_virtual_column && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); + if (has_table_virtual_column && !pipe_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } } /// Subordinary tables could have different but convertible types, like numeric types of different width. From 4921776cc5ba4b342f357deb80d4789d7ddc759f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Feb 2024 11:43:49 +0800 Subject: [PATCH 013/356] change as request --- src/Processors/Transforms/FilterTransform.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 94281301704..4591177850b 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -325,12 +325,18 @@ void FilterTransform::doTransform(Chunk & chunk) * or calculate number of set bytes in the filter. */ size_t first_non_constant_column = num_columns; + size_t min_size_in_memory = std::numeric_limits::max(); for (size_t i = 0; i < num_columns; ++i) { - if (i != filter_column_position && !isColumnConst(*columns[i]) - && removeNullableOrLowCardinalityNullable(types[i])->isValueRepresentedByNumber()) + DataTypePtr type_not_null = removeNullableOrLowCardinalityNullable(types[i]); + if (i != filter_column_position && !isColumnConst(*columns[i]) && type_not_null->isValueRepresentedByNumber()) { - first_non_constant_column = i; + size_t size_in_memory = type_not_null->getSizeOfValueInMemory() + (isNullableOrLowCardinalityNullable(types[i]) ? 1 : 0); + if (size_in_memory < min_size_in_memory) + { + min_size_in_memory = size_in_memory; + first_non_constant_column = i; + } break; } } From 212245457662ebfbc7a6e865ec9518b028506bef Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 19 Feb 2024 12:41:29 +0000 Subject: [PATCH 014/356] Temporary progress --- src/Analyzer/ConstantNode.cpp | 96 ++++++++++++++------------- src/Analyzer/ConstantNode.h | 2 + src/Planner/Planner.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 46 +++++++++++-- src/Planner/PlannerContext.cpp | 10 ++- src/Planner/PlannerContext.h | 12 +++- src/Storages/StorageMerge.cpp | 55 ++++++++------- 7 files changed, 140 insertions(+), 83 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 69bed3dbe90..ce6da693f93 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -38,52 +38,9 @@ ConstantNode::ConstantNode(Field value_) : ConstantNode(value_, applyVisitor(FieldToDataType(), value_)) {} -void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const -{ - buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); - - if (hasAlias()) - buffer << ", alias: " << getAlias(); - - buffer << ", constant_value: " << constant_value->getValue().dump(); - buffer << ", constant_value_type: " << constant_value->getType()->getName(); - - if (getSourceExpression()) - { - buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; - getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); - } -} - -bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const -{ - const auto & rhs_typed = assert_cast(rhs); - return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; -} - -void ConstantNode::updateTreeHashImpl(HashState & hash_state) const -{ - auto type_name = constant_value->getType()->getName(); - hash_state.update(type_name.size()); - hash_state.update(type_name); - - hash_state.update(value_string.size()); - hash_state.update(value_string); -} - -QueryTreeNodePtr ConstantNode::cloneImpl() const -{ - return std::make_shared(constant_value, source_expression); -} - -ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +bool ConstantNode::requiresCastCall() const { const auto & constant_value_literal = constant_value->getValue(); - auto constant_value_ast = std::make_shared(constant_value_literal); - - if (!options.add_cast_for_constants) - return constant_value_ast; - bool need_to_add_cast_function = false; auto constant_value_literal_type = constant_value_literal.getType(); WhichDataType constant_value_type(constant_value->getType()); @@ -131,7 +88,56 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const // Add cast if constant was created as a result of constant folding. // Constant folding may lead to type transformation and literal on shard // may have a different type. - if (need_to_add_cast_function || source_expression != nullptr) + return need_to_add_cast_function || source_expression != nullptr; +} + +void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + + if (getSourceExpression()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; + getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; +} + +void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +{ + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + + hash_state.update(value_string.size()); + hash_state.update(value_string); +} + +QueryTreeNodePtr ConstantNode::cloneImpl() const +{ + return std::make_shared(constant_value, source_expression); +} + +ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +{ + const auto & constant_value_literal = constant_value->getValue(); + auto constant_value_ast = std::make_shared(constant_value_literal); + + if (!options.add_cast_for_constants) + return constant_value_ast; + + if (requiresCastCall()) { auto constant_type_name_ast = std::make_shared(constant_value->getType()->getName()); return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast)); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 51c98a4a3b3..c0df092293d 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,6 +75,8 @@ public: return constant_value->getType(); } + bool requiresCastCall() const; + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index bcc42dbae7f..65033c6f66b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1153,7 +1153,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, if (select_query_options.is_subquery) updateContextForSubqueryExecution(mutable_context); - return std::make_shared(mutable_context, std::move(global_planner_context)); + return std::make_shared(mutable_context, std::move(global_planner_context), select_query_options); } Planner::Planner(const QueryTreeNodePtr & query_tree_, diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 8fc200e7d38..b33e1a3509c 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,6 +44,22 @@ namespace ErrorCodes namespace { +String calculateActionNodeNameForConstant(const ConstantNode & constant_node) +{ + WriteBufferFromOwnString buffer; + if (constant_node.requiresCastCall()) + buffer << "_CAST("; + + buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + + if (constant_node.requiresCastCall()) + { + buffer << ", '" << constant_node.getResultType()->getName() << "'_String)"; + } + + return buffer.str(); +} + class ActionNodeNameHelper { public: @@ -88,10 +104,17 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - if (constant_node.hasSourceExpression()) - result = calculateActionNodeName(constant_node.getSourceExpression()); + if (planner_context.isASTLevelOptimizationAllowed()) + { + result = calculateActionNodeNameForConstant(constant_node); + } else - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + { + if (constant_node.hasSourceExpression()) + result = calculateActionNodeName(constant_node.getSourceExpression()); + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + } break; } case QueryTreeNodeType::FUNCTION: @@ -530,9 +553,20 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = constant_node.hasSourceExpression() - ? action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()) - : calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = [&]() + { + if (planner_context->isASTLevelOptimizationAllowed()) + { + return calculateActionNodeNameForConstant(constant_node); + } + else + { + if (constant_node.hasSourceExpression()) + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + else + return calculateConstantActionNodeName(constant_literal, constant_type); + } + }(); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 422c8c1d01f..57db84d5031 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include "Interpreters/SelectQueryOptions.h" namespace DB { @@ -41,9 +43,10 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i return column_identifiers.contains(column_identifier); } -PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_) +PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) + , select_query_options(select_query_options_) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) @@ -116,6 +119,11 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } +bool PlannerContext::isASTLevelOptimizationAllowed() const +{ + return !(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options.ignore_ast_optimizations); +} + PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { const auto set_source_hash = set_source_node->getTreeHash(); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index d7ea4fd95dd..49272429b43 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -48,7 +49,7 @@ class PlannerContext { public: /// Create planner context with query context and global planner context - PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_); + PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); /// Get planner context query context ContextPtr getQueryContext() const @@ -80,6 +81,11 @@ public: return global_planner_context; } + const SelectQueryOptions & getSelectQueryOptions() const + { + return select_query_options; + } + /// Get or create table expression data for table expression node. TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); @@ -135,6 +141,8 @@ public: static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } + + bool isASTLevelOptimizationAllowed() const; private: /// Query context ContextMutablePtr query_context; @@ -142,6 +150,8 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; + SelectQueryOptions select_query_options; + /// Column node to column identifier std::unordered_map column_node_to_column_identifier; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 029ab4d4e4c..591a0ae375e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,37 +1001,34 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (!allow_experimental_analyzer) + if (has_database_virtual_column && !pipe_header.has("_database")) { - if (has_database_virtual_column && !pipe_header.has("_database")) - { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } - if (has_table_virtual_column && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); + if (has_table_virtual_column && !pipe_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); } /// Subordinary tables could have different but convertible types, like numeric types of different width. @@ -1393,7 +1390,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const RowPolicyDataOpt & row_policy_data_opt, ContextMutablePtr local_context, QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage) + QueryProcessingStage::Enum processed_stage [[maybe_unused]]) { Block before_block_header = builder.getHeader(); @@ -1452,9 +1449,9 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) - convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; + // if (local_context->getSettingsRef().allow_experimental_analyzer + // && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + // convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) { From a2fa67c2804049f7e82d81bb72733aaa753e84ec Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 20 Feb 2024 10:54:33 +0100 Subject: [PATCH 015/356] Allow local type for local_blob_storage --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 7 +++++-- tests/config/config.d/storage_conf.xml | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index b3626135177..1e8eee1d29c 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -254,7 +254,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory) void registerLocalObjectStorage(ObjectStorageFactory & factory) { - factory.registerObjectStorageType("local_blob_storage", []( + auto creator = []( const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, @@ -267,7 +267,10 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) /// keys are mapped to the fs, object_key_prefix is a directory also fs::create_directories(object_key_prefix); return std::make_shared(object_key_prefix); - }); + }; + + factory.registerObjectStorageType("local_blob_storage", creator); + factory.registerObjectStorageType("local", creator); } #endif diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 1429dfff724..00d8cb3aea5 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -37,11 +37,13 @@ - local_blob_storage + object_storage + local local_disk/ - local_blob_storage + object_storage + local_blob_storage local_disk_2/ From 2a30d6b9d4371439a6b0f9b440e1def439fa74be Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 20 Feb 2024 10:57:32 +0100 Subject: [PATCH 016/356] Update ObjectStorageFactory.cpp --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1e8eee1d29c..28e75053b1e 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -206,7 +206,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) void registerAzureObjectStorage(ObjectStorageFactory & factory) { - factory.registerObjectStorageType("azure_blob_storage", []( + auto creator = []( const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, @@ -220,7 +220,9 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) getAzureBlobStorageSettings(config, config_prefix, context), container_name); - }); + }; + factory.registerObjectStorageType("azure_blob_storage", creator); + factory.registerObjectStorageType("azure", creator); } #endif From dd1a25fa35496b145eac43bfc2cc9733e224d4d9 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 20 Feb 2024 10:59:18 +0100 Subject: [PATCH 017/356] Update azure config --- tests/config/config.d/azure_storage_conf.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/config/config.d/azure_storage_conf.xml b/tests/config/config.d/azure_storage_conf.xml index f42bb8e3cf8..412d40111a7 100644 --- a/tests/config/config.d/azure_storage_conf.xml +++ b/tests/config/config.d/azure_storage_conf.xml @@ -2,7 +2,8 @@ - azure_blob_storage + object_storage + azure http://localhost:10000/devstoreaccount1 cont false From 2405115484dcce032e6984e9d63f1ad8075eca5b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 20 Feb 2024 18:05:48 +0100 Subject: [PATCH 018/356] Fix upgrade check --- docker/test/upgrade/run.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index aaba5cc6a8c..d8ba48909b6 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -103,6 +103,16 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml +sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ + | sed "s|azure>|>azure_blob_storage>|" \ + > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local>|>local_blob_storage>|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + # async_replication setting doesn't exist on some older versions remove_keeper_config "async_replication" "1" From 23f300b581bb904931d4445c562fd0e4c0540ed2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Feb 2024 00:57:35 +0000 Subject: [PATCH 019/356] refactoring of virtual columns --- src/Interpreters/InterpreterCreateQuery.cpp | 54 ++++--- src/Interpreters/InterpreterDeleteQuery.cpp | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/MutationsInterpreter.cpp | 6 +- src/Interpreters/inplaceBlockConversions.cpp | 15 +- src/Interpreters/inplaceBlockConversions.h | 2 +- .../Algorithms/SummingSortedAlgorithm.cpp | 2 +- .../optimizeUseAggregateProjection.cpp | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 15 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/AlterCommands.cpp | 2 +- src/Storages/BlockNumberColumn.cpp | 23 --- src/Storages/BlockNumberColumn.h | 16 -- src/Storages/ColumnsDescription.cpp | 38 +---- src/Storages/ColumnsDescription.h | 34 ++-- src/Storages/IStorage.cpp | 12 +- src/Storages/IStorage.h | 5 +- src/Storages/MergeTree/IMergeTreeReader.cpp | 85 +++------- src/Storages/MergeTree/IMergeTreeReader.h | 6 +- src/Storages/MergeTree/MergeTask.h | 2 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 15 +- src/Storages/MergeTree/MergeTreeData.cpp | 128 ++++++--------- src/Storages/MergeTree/MergeTreeData.h | 10 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 2 +- .../MergeTreeDataPartWriterCompact.cpp | 10 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 15 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 54 ++----- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 - .../MergeTree/MergeTreePrefetchedReadPool.h | 1 - src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 - src/Storages/MergeTree/MergeTreeReadPool.h | 1 - .../MergeTree/MergeTreeReadPoolBase.cpp | 3 - .../MergeTree/MergeTreeReadPoolBase.h | 2 - .../MergeTree/MergeTreeReadPoolInOrder.cpp | 2 - .../MergeTree/MergeTreeReadPoolInOrder.h | 1 - .../MergeTreeReadPoolParallelReplicas.cpp | 2 - .../MergeTreeReadPoolParallelReplicas.h | 1 - ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 2 - ...MergeTreeReadPoolParallelReplicasInOrder.h | 1 - src/Storages/MergeTree/MergeTreeReadTask.h | 2 - .../MergeTree/MergeTreeSelectProcessor.cpp | 108 +++---------- .../MergeTree/MergeTreeSelectProcessor.h | 9 +- .../MergeTree/MergeTreeSequentialSource.cpp | 48 ++++-- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/MergeTreeVirtualColumns.cpp | 62 ++++++++ src/Storages/MergeTreeVirtualColumns.h | 33 ++++ src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 2 +- src/Storages/StorageInMemoryMetadata.h | 2 +- src/Storages/StorageLog.cpp | 15 +- src/Storages/StorageSnapshot.cpp | 147 +++++++++++------- src/Storages/StorageSnapshot.h | 38 ++--- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemJemalloc.cpp | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/VirtualColumnsDescription.cpp | 96 ++++++++++++ src/Storages/VirtualColumnsDescription.h | 68 ++++++++ 57 files changed, 632 insertions(+), 586 deletions(-) create mode 100644 src/Storages/MergeTreeVirtualColumns.cpp create mode 100644 src/Storages/MergeTreeVirtualColumns.h create mode 100644 src/Storages/VirtualColumnsDescription.cpp create mode 100644 src/Storages/VirtualColumnsDescription.h diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c491ee30321..c48d025a78f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -890,24 +889,6 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name)); } - /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. - if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree")) - { - auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); - if (search != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for lightweight delete feature", - LightweightDeleteDescription::FILTER_COLUMN.name); - - auto search_block_number = all_columns.find(BlockNumberColumn::name); - if (search_block_number != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for storing block number", - BlockNumberColumn::name); - } - const auto & settings = getContext()->getSettingsRef(); /// Check low cardinality types in creating table if it was not allowed in setting @@ -973,9 +954,24 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } +void validateVirtualColumns(const IStorage & storage) +{ + const auto & virtual_columns = storage.getVirtualsDescription(); + for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns()) + { + auto virtual_desc = virtual_columns.tryGetDescription(storage_column.name); + if (virtual_desc && virtual_desc->kind == VirtualsKind::Persistent) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column", + storage_column.name, storage.getName()); + } + } +} + namespace { - void checkTemporaryTableEngineName(const String& name) + void checkTemporaryTableEngineName(const String & name) { if (name.starts_with("Replicated") || name.starts_with("Shared") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); @@ -1549,6 +1545,16 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); } + validateVirtualColumns(*res); + + if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column of type Object, " + "because storage {} doesn't support dynamic subcolumns", + res->getName()); + } + if (!create.attach && getContext()->getSettingsRef().database_replicated_allow_only_replicated_engine) { bool is_replicated_storage = typeid_cast(res.get()) != nullptr; @@ -1598,14 +1604,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// we can safely destroy the object without a call to "shutdown", because there is guarantee /// that no background threads/similar resources remain after exception from "startup". - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column of type Object, " - "because storage {} doesn't support dynamic subcolumns", - res->getName()); - } - res->startup(); return true; } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 5c13a1145d1..97ae9649ae8 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -15,7 +15,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 724cfca6a80..3acb8e9e16f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -133,7 +133,7 @@ Block InterpreterInsertQuery::getSampleBlock( if (auto * window_view = dynamic_cast(table.get())) return window_view->getInputHeader(); else if (no_destination) - return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); + return metadata_snapshot->getSampleBlockwithVirtuals(table->getVirtuals()); else return metadata_snapshot->getSampleBlockNonMaterialized(); } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 502b961ced8..4063dadea5e 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -352,7 +352,7 @@ bool MutationsInterpreter::Source::isCompactPart() const return part && part->getType() == MergeTreeDataPartType::Compact; } -static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) +static Names getAvailableColumnswithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) { auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical(); for (const auto & column : storage.getVirtuals()) @@ -369,7 +369,7 @@ MutationsInterpreter::MutationsInterpreter( : MutationsInterpreter( Source(storage_), metadata_snapshot_, std::move(commands_), - getAvailableColumnsWithVirtuals(metadata_snapshot_, *storage_), + getAvailableColumnswithVirtuals(metadata_snapshot_, *storage_), std::move(context_), std::move(settings_)) { if (settings.can_execute && dynamic_cast(source.getStorage().get())) diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index fd8f5b154c4..061156c56db 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include namespace DB @@ -280,7 +280,7 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number) + StorageMetadataPtr metadata_snapshot) { size_t num_columns = requested_columns.size(); if (num_columns != res_columns.size()) @@ -359,14 +359,9 @@ void fillMissingColumns( } else { - if (requested_column->name == BlockNumberColumn::name) - res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); - else - /// We must turn a constant column into a full column because the interpreter could infer - /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. - res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); - - + /// We must turn a constant column into a full column because the interpreter could infer + /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. + res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); } } } diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index 7a13a75ec8b..bea44bf6db9 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -46,6 +46,6 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number = 0); + StorageMetadataPtr metadata_snapshot); } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 845cf561968..6253d3058aa 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 534716cc60e..75d8bddcf11 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -431,7 +431,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( { const auto & keys = aggregating.getParams().keys; const auto & aggregates = aggregating.getParams().aggregates; - Block key_virtual_columns = reading.getMergeTreeData().getSampleBlockWithVirtualColumns(); + Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(); AggregateProjectionCandidates candidates; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index aed053909d7..e2ce36264dd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -280,8 +280,8 @@ ReadFromMergeTree::ReadFromMergeTree( bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot_->getSampleBlockForColumns(real_column_names_), + storage_snapshot_, query_info_.prewhere_info, - data_.getPartitionValueType(), virt_column_names_)}) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) @@ -381,7 +381,6 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( reader_settings, required_columns, virt_column_names, - data.getPartitionValueType(), pool_settings, context); @@ -395,7 +394,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, + pool, std::move(algorithm), storage_snapshot, prewhere_info, actions_settings, block_size_copy, reader_settings, virt_column_names); auto source = std::make_shared(std::move(processor)); @@ -463,7 +462,6 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, virt_column_names, - data.getPartitionValueType(), pool_settings, context); } @@ -477,7 +475,6 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, virt_column_names, - data.getPartitionValueType(), pool_settings, context); } @@ -496,7 +493,7 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, + pool, std::move(algorithm), storage_snapshot, prewhere_info, actions_settings, block_size_copy, reader_settings, virt_column_names); auto source = std::make_shared(std::move(processor)); @@ -554,7 +551,6 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, virt_column_names, - data.getPartitionValueType(), pool_settings, context); } @@ -570,7 +566,6 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, virt_column_names, - data.getPartitionValueType(), pool_settings, context); } @@ -604,7 +599,7 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, + pool, std::move(algorithm), storage_snapshot, prewhere_info, actions_settings, block_size, reader_settings, virt_column_names); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -1730,8 +1725,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot->getSampleBlockForColumns(real_column_names), + storage_snapshot, prewhere_info_value, - data.getPartitionValueType(), virt_column_names)}; updateSortDescriptionForOutputStream( diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 91bbf04f327..c10dc4378e8 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -214,7 +214,7 @@ Chain buildPushingToViewsChain( /// If we don't write directly to the destination /// then expect that we're inserting with precalculated virtual columns - auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()) + auto storage_header = no_destination ? metadata_snapshot->getSampleBlockwithVirtuals(storage->getVirtuals()) : metadata_snapshot->getSampleBlock(); /** TODO This is a very important line. At any insertion into the table one of chains should own lock. diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 766863ed9f9..792f942fcf1 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/BlockNumberColumn.cpp b/src/Storages/BlockNumberColumn.cpp index 8c9e1fd902a..e69de29bb2d 100644 --- a/src/Storages/BlockNumberColumn.cpp +++ b/src/Storages/BlockNumberColumn.cpp @@ -1,23 +0,0 @@ -#include -#include - -namespace DB -{ - -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - -CompressionCodecPtr getCompressionCodecForBlockNumberColumn() -{ - std::vector codecs; - codecs.reserve(2); - auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory(); - codecs.emplace_back(getCompressionCodecDelta(data_bytes_size)); - codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {})); - return std::make_shared(codecs); -} - -const String BlockNumberColumn::name = "_block_number"; -const DataTypePtr BlockNumberColumn::type = std::make_shared(); -const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn(); - -} diff --git a/src/Storages/BlockNumberColumn.h b/src/Storages/BlockNumberColumn.h index fffa68bfd49..e69de29bb2d 100644 --- a/src/Storages/BlockNumberColumn.h +++ b/src/Storages/BlockNumberColumn.h @@ -1,16 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -struct BlockNumberColumn -{ - static const String name; - static const DataTypePtr type; - static const CompressionCodecPtr compression_codec; -}; - -} diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 40d657f3deb..6ee87297cab 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,15 +31,12 @@ #include #include #include -#include +#include namespace DB { -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - - namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; @@ -563,6 +560,12 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co return *it; } +const ColumnDescription * ColumnsDescription::tryGet(const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + return it == columns.get<1>().end() ? nullptr : &(*it); +} + static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) { switch (kind) @@ -789,33 +792,6 @@ bool ColumnsDescription::hasCompressionCodec(const String & column_name) const return it != columns.get<1>().end() && it->codec != nullptr; } -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec; - - return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec); -} - -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const -{ - assert (column_name != BlockNumberColumn::name); - return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); -} - -ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - assert (column_name != BlockNumberColumn::name); - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec->getFullCodecDesc(); - - return it->codec; -} - ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const { ColumnTTLs ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 5f3e75dc38e..cc6c7d0e1b9 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -29,6 +29,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +enum class VirtualsKind : UInt8 +{ + None = 0, + Ephemeral = 1, + Persistent = 2, + All = Ephemeral | Persistent, +}; + struct GetColumnsOptions { enum Kind : UInt8 @@ -52,9 +60,15 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withVirtuals(bool value = true) + GetColumnsOptions & withVirtuals(VirtualsKind value = VirtualsKind::All) { - with_virtuals = value; + virtuals_kind = value; + return *this; + } + + GetColumnsOptions & withPersistentVirtuals(bool value = true) + { + with_persistent_virtuals = value; return *this; } @@ -64,17 +78,12 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withSystemColumns(bool value = true) - { - with_system_columns = value; - return *this; - } - Kind kind; + VirtualsKind virtuals_kind = VirtualsKind::None; + bool with_subcolumns = false; - bool with_virtuals = false; + bool with_persistent_virtuals = false; bool with_extended_objects = false; - bool with_system_columns = false; }; /// Description of a single table column (in CREATE TABLE for example). @@ -161,6 +170,7 @@ public: bool hasNested(const String & column_name) const; bool hasSubcolumn(const String & column_name) const; const ColumnDescription & get(const String & column_name) const; + const ColumnDescription * tryGet(const String & column_name) const; template void modify(const String & column_name, F && f) @@ -214,9 +224,6 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name) const; - ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; String toString() const; static ColumnsDescription parse(const String & str); @@ -270,4 +277,5 @@ private: /// don't have strange constructions in default expression like SELECT query or /// arrayJoin function. Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context); + } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 85ef6a0bb35..957051cd409 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -237,11 +237,21 @@ void IStorage::renameInMemory(const StorageID & new_table_id) storage_id = new_table_id; } -NamesAndTypesList IStorage::getVirtuals() const +VirtualColumnsDescription IStorage::getVirtualsDescription() const { return {}; } +NamesAndTypesList IStorage::getVirtuals() const +{ + return getVirtualsDescription().getNamesAndTypesList(); +} + +Block IStorage::getVirtualsHeader() const +{ + return getVirtualsDescription().getSampleBlock(); +} + Names IStorage::getAllRegisteredNames() const { Names result; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..94b089ef613 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -17,6 +17,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include @@ -226,7 +227,9 @@ public: /// virtual column will be overridden and inaccessible. /// /// By default return empty list of columns. - virtual NamesAndTypesList getVirtuals() const; + virtual VirtualColumnsDescription getVirtualsDescription() const; + virtual NamesAndTypesList getVirtuals() const; /// TODO: make non virtual. + Block getVirtualsHeader() const; Names getAllRegisteredNames() const override; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 7ff69f7cc4b..f774d3a387a 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,6 +1,7 @@ #include #include -#include +#include +#include #include #include #include @@ -22,9 +23,9 @@ namespace namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NO_SUCH_COLUMN_IN_TABLE; } - IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, @@ -56,16 +57,10 @@ IMergeTreeReader::IMergeTreeReader( columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); - size_t pos = 0; for (const auto & column : requested_columns) { columns_to_read.emplace_back(getColumnInPart(column)); serializations.emplace_back(getSerializationInPart(column)); - - if (read_task_info && read_task_info->virt_column_names.contains(column.name)) - virt_column_pos_to_name.emplace(pos, column.name); - - ++pos; } } @@ -76,72 +71,36 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const { - if (std::all_of( - virt_column_pos_to_name.begin(), - virt_column_pos_to_name.end(), - [&columns](auto & elem) - { - chassert(elem.first < columns.size()); - return columns[elem.first] != nullptr; - })) - return; - chassert(read_task_info != nullptr); const IMergeTreeDataPart * part = read_task_info->data_part.get(); if (part->isProjectionPart()) part = part->getParentPart(); - for (auto [pos, name] : virt_column_pos_to_name) - { - auto & column = columns[pos]; + const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns(); + const auto & virtual_columns = storage_snapshot->virtual_columns; - if (column != nullptr) + auto it = requested_columns.begin(); + for (size_t pos = 0; pos < columns.size(); ++pos, ++it) + { + if (columns[pos] || storage_columns.has(it->name)) continue; - if (name == "_part_offset") - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column {} must have been filled by part reader", name); - } - else if (name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - /// If _row_exists column isn't present in the part then fill it here with 1s - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); - } - else if (name == BlockNumberColumn::name) - { - column = BlockNumberColumn::type->createColumnConst(rows, part->info.min_block)->convertToFullColumnIfConst(); - } - else if (name == "_part") - { - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->name) - ->convertToFullColumnIfConst(); - } - else if (name == "_part_index") - { - column = DataTypeUInt64().createColumnConst(rows, read_task_info->part_index_in_query)->convertToFullColumnIfConst(); - } - else if (name == "_part_uuid") - { - column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); - } - else if (name == "_partition_id") - { - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->info.partition_id) - ->convertToFullColumnIfConst(); - } - else if (name == "_partition_value") - { - column = read_task_info->partition_value_type - ->createColumnConst(rows, Tuple(part->partition.value.begin(), part->partition.value.end())) - ->convertToFullColumnIfConst(); - } + auto virtual_column = virtual_columns.tryGet(it->name); + if (!virtual_column) + continue; + + if (!it->type->equals(*virtual_column->type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Data type for virtual column {} mismatched. Requested type: {}, Virtual column type: {}", + it->name, it->type->getName(), virtual_column->type->getName()); + + auto field = getFieldForConstVirtualColumn(it->name, *part, read_task_info->part_index_in_query); + columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst(); } } -void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const +void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const { try { @@ -150,7 +109,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, storage_snapshot->metadata, block_number); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index cd4417265fa..3fe9853fced 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -2,6 +2,7 @@ #include #include +#include "DataTypes/Serializations/ISerialization.h" #include #include #include @@ -49,7 +50,7 @@ public: /// Add columns from ordered_names that are not present in the block. /// Missing columns are added in the order specified by ordered_names. /// num_rows is needed in case if all res_columns are nullptr. - void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const; + void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const; /// Evaluate defaulted columns if necessary. void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const; @@ -120,9 +121,6 @@ private: /// Shared information required for reading. MergeTreeReadTaskInfoPtr read_task_info; - - /// Map of positions in requested_columns which are virtual columns to their names. - std::map virt_column_pos_to_name; }; } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 7fb4797e482..28a3c671914 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -15,7 +15,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index fc6599b2851..8eb714c7e24 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "Storages/ColumnsDescription.h" #include #include #include @@ -106,10 +107,8 @@ NameSet injectRequiredColumns( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (with_subcolumns) - options.withSubcolumns(); + .withVirtuals(VirtualsKind::Persistent) + .withSubcolumns(with_subcolumns); auto virtuals_options = GetColumnsOptions(GetColumnsOptions::None).withVirtuals(); @@ -283,12 +282,8 @@ MergeTreeReadTaskColumns getReadTaskColumns( MergeTreeReadTaskColumns result; auto options = GetColumnsOptions(GetColumnsOptions::All) .withExtendedObjects() - .withSystemColumns(); - - if (with_subcolumns) - options.withSubcolumns(); - - options.withVirtuals(); + .withVirtuals() + .withSubcolumns(with_subcolumns); bool has_part_offset = std::find(required_columns.begin(), required_columns.end(), "_part_offset") != required_columns.end(); NameSet columns_from_previous_steps; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c8262914702..a3a4cb9619f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "Common/logger_useful.h" #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include "Storages/ProjectionsDescription.h" #include #include #include @@ -67,7 +69,7 @@ #include #include #include -#include +#include #include #include #include @@ -1001,73 +1003,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat /// TODO Checks for Graphite mode. } +const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"}; -DataTypePtr MergeTreeData::getPartitionValueType() const +Block MergeTreeData::getHeaderWithVirtualsForFilter() const { - DataTypePtr partition_value_type; - auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes(); - if (partition_types.empty()) - partition_value_type = std::make_shared(); - else - partition_value_type = std::make_shared(std::move(partition_types)); - return partition_value_type; + Block header; + const auto & virtuals_desc = getVirtualsDescription(); + for (const auto & name : virtuals_useful_for_filter) + if (auto column = virtuals_desc.tryGet(name)) + header.insert({column->type->createColumn(), column->type, name}); + return header; } - -Block MergeTreeData::getSampleBlockWithVirtualColumns() const +Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const { - DataTypePtr partition_value_type = getPartitionValueType(); - return { - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_part"), - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_partition_id"), - ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid"), - ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")}; -} + auto block = getHeaderWithVirtualsForFilter(); - -Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty) const -{ - auto block = getSampleBlockWithVirtualColumns(); - MutableColumns columns = block.mutateColumns(); - - auto & part_column = columns[0]; - auto & partition_id_column = columns[1]; - auto & part_uuid_column = columns[2]; - auto & partition_value_column = columns[3]; - - bool has_partition_value = typeid_cast(partition_value_column.get()); for (const auto & part_or_projection : parts) { if (ignore_empty && part_or_projection->isEmpty()) continue; - const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get(); - part_column->insert(part->name); - partition_id_column->insert(part->info.partition_id); - part_uuid_column->insert(part->uuid); - Tuple tuple(part->partition.value.begin(), part->partition.value.end()); - if (has_partition_value) - partition_value_column->insert(tuple); - if (one_part) + const auto * part = part_or_projection->isProjectionPart() + ? part_or_projection->getParentPart() + : part_or_projection.get(); + + for (auto & column : block) { - part_column = ColumnConst::create(std::move(part_column), 1); - partition_id_column = ColumnConst::create(std::move(partition_id_column), 1); - part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1); - if (has_partition_value) - partition_value_column = ColumnConst::create(std::move(partition_value_column), 1); - break; + auto field = getFieldForConstVirtualColumn(column.name, *part, 0); + column.column->assumeMutableRef().insert(field); } } - block.setColumns(std::move(columns)); - if (!has_partition_value) - block.erase("_partition_value"); return block; } @@ -1076,13 +1043,14 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const { if (parts.empty()) - return 0u; + return 0; + auto metadata_snapshot = getInMemoryMetadataPtr(); - Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); + auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]}); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); - // Generate valid expressions for filtering + /// Generate valid expressions for filtering bool valid = true; for (const auto * input : filter_dag->getInputs()) if (!virtual_columns_block.has(input->result_name)) @@ -1095,7 +1063,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( std::unordered_set part_values; if (valid) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) @@ -6653,14 +6621,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; NameSet required_columns_set(required_columns.begin(), required_columns.end()); - if (required_columns_set.contains("_partition_value") && !typeid_cast(getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - getStorageID().getTableName()); - } - if (!primary_key_max_column_name.empty()) need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name); @@ -6686,11 +6646,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( }; Block virtual_columns_block; - auto virtual_block = getSampleBlockWithVirtualColumns(); + auto virtual_block = getHeaderWithVirtualsForFilter(); bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); }); if (has_virtual_column || filter_dag) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true); if (virtual_columns_block.rows() == 0) return {}; } @@ -7960,19 +7920,29 @@ AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartP return result; } -NamesAndTypesList MergeTreeData::getVirtuals() const +VirtualColumnsDescription MergeTreeData::getVirtualsDescription() const { - return NamesAndTypesList{ - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_partition_value", getPartitionValueType()), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - LightweightDeleteDescription::FILTER_COLUMN, - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - }; + VirtualColumnsDescription desc; + auto low_cardinality_type = std::make_shared(std::make_shared()); + auto metadata_snapshot = getInMemoryMetadataPtr(); + + desc.addEphemeral("_part", low_cardinality_type, ""); + desc.addEphemeral("_part_index", std::make_shared(), ""); + desc.addEphemeral("_part_uuid", std::make_shared(), ""); + desc.addEphemeral("_partition_id", low_cardinality_type, ""); + desc.addEphemeral("_sample_factor", std::make_shared(), ""); + desc.addEphemeral("_part_offset", std::make_shared(), ""); + + if (metadata_snapshot->hasPartitionKey()) + { + auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes(); + desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), ""); + } + + desc.addPersistent(LightweightDeleteDescription::FILTER_COLUMN.name, LightweightDeleteDescription::FILTER_COLUMN.type, nullptr, ""); + desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, ""); + + return desc; } size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4ad440dae00..c3c1312d429 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -444,7 +444,7 @@ public: bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } - NamesAndTypesList getVirtuals() const override; + VirtualColumnsDescription getVirtualsDescription() const override; /// Snapshot for MergeTree contains the current set of data parts /// at the moment of the start of query. @@ -993,15 +993,13 @@ public: void removeQueryId(const String & query_id) const; void removeQueryIdNoLock(const String & query_id) const TSA_REQUIRES(query_id_set_mutex); - /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. - DataTypePtr getPartitionValueType() const; + static const Names virtuals_useful_for_filter; /// Construct a sample block of virtual columns. - Block getSampleBlockWithVirtualColumns() const; + Block getHeaderWithVirtualsForFilter() const; /// Construct a block consisting only of possible virtual columns for part pruning. - /// If one_part is true, fill in at most one part. - Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const; + Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const; /// In merge tree we do inserts with several steps. One of them: /// X. write part to temporary directory with some temp name diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 7baba26d15c..ee9fa30d98d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index b05b4584259..46c4338ab90 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -5,8 +5,6 @@ namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -55,14 +53,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - const auto & storage_columns = metadata_snapshot->getColumns(); + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - ASTPtr compression; - if (column.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = storage_columns.getCodecDescOrDefault(column.name, default_codec); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9d373504473..3a646e0b85d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -6,12 +6,11 @@ #include #include #include -#include +#include #include namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); namespace ErrorCodes { @@ -91,15 +90,11 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { - const auto & columns = metadata_snapshot->getColumns(); - for (const auto & it : columns_list) + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); + for (const auto & column : columns_list) { - ASTPtr compression; - if (it.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = columns.getCodecDescOrDefault(it.name, default_codec); - addStreams(it, compression); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a76d370d057..df1176a9ea5 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -46,7 +46,7 @@ #include #include -#include +#include #include namespace CurrentMetrics @@ -483,12 +483,13 @@ std::optional> MergeTreeDataSelectExecutor::filterPar { if (!filter_dag) return {}; - auto sample = data.getSampleBlockWithVirtualColumns(); + + auto sample = data.getHeaderWithVirtualsForFilter(); auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample); if (!dag) return {}; - auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */); + auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } @@ -876,54 +877,17 @@ static void selectColumnNames( bool & sample_factor_column_queried) { sample_factor_column_queried = false; + const auto & virtual_columns = data.getVirtualsDescription(); - for (const String & name : column_names_to_return) + for (const auto & name : column_names_to_return) { - if (name == "_part") + if (virtual_columns.has(name)) { - virt_column_names.push_back(name); - } - else if (name == "_part_index") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_id") - { - virt_column_names.push_back(name); - } - else if (name == "_part_offset") - { - virt_column_names.push_back(name); - } - else if (name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - virt_column_names.push_back(name); - } - else if (name == BlockNumberColumn::name) - { - virt_column_names.push_back(name); - } - else if (name == "_part_uuid") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_value") - { - if (!typeid_cast(data.getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - data.getStorageID().getTableName()); - } + if (name == "_sample_factor") + sample_factor_column_queried = true; virt_column_names.push_back(name); } - else if (name == "_sample_factor") - { - sample_factor_column_queried = true; - virt_column_names.push_back(name); - } else { real_column_names.push_back(name); diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 701b8fbf528..47c2fe07bb4 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -114,7 +114,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -125,7 +124,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( reader_settings_, column_names_, virtual_column_names_, - partition_value_type_, settings_, context_) , WithContext(context_) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 1a700abb009..b1335fd2774 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -24,7 +24,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index dbd27aebc21..8ed7a9d8707 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -40,7 +40,6 @@ MergeTreeReadPool::MergeTreeReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -51,7 +50,6 @@ MergeTreeReadPool::MergeTreeReadPool( reader_settings_, column_names_, virtual_column_names_, - partition_value_type_, settings_, context_) , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 9da75648386..e45ccad912f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -32,7 +32,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index d5811c33f00..95e42d39cf7 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,7 +13,6 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) @@ -23,7 +22,6 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( , reader_settings(reader_settings_) , column_names(column_names_) , virtual_column_names(virtual_column_names_) - , partition_value_type(partition_value_type_) , pool_settings(pool_settings_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) @@ -69,7 +67,6 @@ void MergeTreeReadPoolBase::fillPerPartInfos() /*with_subcolumns=*/true); read_task_info.virt_column_names = {virtual_column_names.begin(), virtual_column_names.end()}; - read_task_info.partition_value_type = partition_value_type; if (pool_settings.preferred_block_size_bytes > 0) { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 3aa9eb8670e..a0c6a30fb63 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -29,7 +29,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); @@ -44,7 +43,6 @@ protected: const MergeTreeReaderSettings reader_settings; const Names column_names; const Names virtual_column_names; - const DataTypePtr partition_value_type; const PoolSettings pool_settings; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 692e45993c7..1b621ad5055 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -18,7 +18,6 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -29,7 +28,6 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( reader_settings_, column_names_, virtual_column_names_, - partition_value_type_, settings_, context_) , has_limit_below_one_block(has_limit_below_one_block_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index de7457dfab8..d9cc1ba4984 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -17,7 +17,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index fb14dfe9a6e..47436ed1407 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -19,7 +19,6 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -30,7 +29,6 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( reader_settings_, column_names_, virtual_column_names_, - partition_value_type_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 9a8b0e50c04..6a548dffe37 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -17,7 +17,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index d242d1e81fe..a822a517933 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -18,7 +18,6 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -29,7 +28,6 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd reader_settings_, column_names_, virtual_column_names_, - partition_value_type_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 09935e1be2f..3e5f8f5dfba 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -18,7 +18,6 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - const DataTypePtr & partition_value_type_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 7e935f5d28d..21ec19ee033 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -62,8 +62,6 @@ struct MergeTreeReadTaskInfo MergeTreeReadTaskColumns task_columns; /// Virtual column names to read NameSet virt_column_names; - /// For `partition_value` virtual column - DataTypePtr partition_value_type; /// Shared initialized size predictor. It is copied for each new task. MergeTreeBlockSizePredictorPtr shared_size_predictor; }; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ad8b280a7e6..8e089741cab 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -6,12 +6,13 @@ #include #include #include +#include "Storages/StorageSnapshot.h" #include #include #include #include #include -#include +#include #include namespace DB @@ -21,12 +22,13 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; extern const int QUERY_WAS_CANCELLED; + extern const int NO_SUCH_COLUMN_IN_TABLE; } MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, @@ -34,13 +36,13 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( const Names & virt_column_names_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) + , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) , reader_settings(reader_settings_) , block_size_params(block_size_params_) , virt_column_names(virt_column_names_) - , partition_value_type(storage_.getPartitionValueType()) { if (reader_settings.apply_deleted_mask) { @@ -58,7 +60,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( } result_header = pool->getHeader(); - injectVirtualColumns(result_header, partition_value_type, virt_column_names); + injectVirtualColumns(result_header, storage_snapshot, virt_column_names); result_header = applyPrewhereActions(result_header, prewhere_info); if (!prewhere_actions.steps.empty()) @@ -179,89 +181,20 @@ void MergeTreeSelectProcessor::initializeRangeReaders() task->initializeRangeReaders(all_prewhere_actions); } - -namespace +void MergeTreeSelectProcessor::injectVirtualColumns( + Block & block, + const StorageSnapshotPtr & storage_snapshot, + const Names & virtual_columns) { - struct VirtualColumnsInserter - { - explicit VirtualColumnsInserter(Block & block_) : block(block_) {} - - void insertUInt8Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUInt64Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUUIDColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertLowCardinalityColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(std::make_shared()), name}); - } - - void insertPartitionValueColumn(const DataTypePtr & partition_value_type, const String & name) - { - ColumnPtr column = partition_value_type->createColumn(); - block.insert({column, partition_value_type, name}); - } - - Block & block; - }; -} - -void MergeTreeSelectProcessor::injectVirtualColumns(Block & block, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - - /// add virtual columns - /// Except _sample_factor, which is added from the outside. for (const auto & virtual_column_name : virtual_columns) { - if (virtual_column_name == "_part_offset") - { - inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); - } - else if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - ColumnPtr column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); - inserter.insertUInt8Column(column, virtual_column_name); - } - else if (virtual_column_name == BlockNumberColumn::name) - { - ColumnPtr column = BlockNumberColumn::type->createColumn(); - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part") - { - ColumnPtr column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_part_index") - { - ColumnPtr column = DataTypeUInt64().createColumn(); - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part_uuid") - { - ColumnPtr column = DataTypeUUID().createColumn(); - inserter.insertUUIDColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_id") - { - ColumnPtr column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_value") - { - inserter.insertPartitionValueColumn(partition_value_type, virtual_column_name); - } + auto column = storage_snapshot->virtual_columns.tryGet(virtual_column_name); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no virtual column {} in table {}", + virtual_column_name, storage_snapshot->storage.getStorageID().getNameForLogs()); + + block.insert({column->type->createColumn(), column->type, column->name}); } } @@ -317,9 +250,12 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere } Block MergeTreeSelectProcessor::transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) + Block block, + const StorageSnapshotPtr & storage_snapshot, + const PrewhereInfoPtr & prewhere_info, + const Names & virtual_columns) { - injectVirtualColumns(block, partition_value_type, virtual_columns); + injectVirtualColumns(block, storage_snapshot, virtual_columns); auto transformed = applyPrewhereActions(std::move(block), prewhere_info); return transformed; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index a3cfec1774d..9b399003909 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -5,6 +5,7 @@ #include #include #include +#include "Storages/StorageSnapshot.h" namespace DB @@ -41,7 +42,7 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, @@ -52,8 +53,8 @@ public: static Block transformHeader( Block block, + const StorageSnapshotPtr & storage_snapshot, const PrewhereInfoPtr & prewhere_info, - const DataTypePtr & partition_value_type, const Names & virtual_columns); Block getHeader() const { return result_header; } @@ -82,7 +83,7 @@ private: }; /// Used for filling header with no rows as well as block with data - static void injectVirtualColumns(Block & block, const DataTypePtr & partition_value_type, const Names & virtual_columns); + static void injectVirtualColumns(Block & block, const StorageSnapshotPtr & storage_snapshot, const Names & virtual_columns); static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); /// Sets up range readers corresponding to data readers @@ -90,6 +91,7 @@ private: const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; + const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; @@ -98,7 +100,6 @@ private: const MergeTreeReaderSettings reader_settings; const MergeTreeReadTask::BlockSizeParams block_size_params; const Names virt_column_names; - const DataTypePtr partition_value_type; /// Current task to read from. MergeTreeReadTaskPtr task; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 8a326d64fc7..73689e15cd4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ namespace DB { + namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; @@ -55,7 +57,6 @@ protected: Chunk generate() override; private: - const MergeTreeData & storage; StorageSnapshotPtr storage_snapshot; @@ -86,7 +87,6 @@ private: void finish(); }; - MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeSequentialSourceType type, const MergeTreeData & storage_, @@ -136,10 +136,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( { auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (storage.supportsSubcolumns()) - options.withSubcolumns(); + .withVirtuals(VirtualsKind::Persistent) + .withSubcolumns(storage.supportsSubcolumns()); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } @@ -193,6 +191,38 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( {}); } +static void fillBlockNumberColumns( + Columns & res_columns, + const NamesAndTypesList & columns_list, + UInt64 block_number, + UInt64 block_offset, + UInt64 num_rows) +{ + chassert(res_columns.size() == columns_list.size()); + + auto it = columns_list.begin(); + for (size_t i = 0; i < res_columns.size(); ++i, ++it) + { + if (res_columns[i]) + continue; + + if (it->name == BlockNumberColumn::name) + { + res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); + } + else if (it->name == BlockOffsetColumn::name) + { + auto column = BlockOffsetColumn::type->createColumn(); + auto & block_offset_data = assert_cast(*column).getData(); + + block_offset_data.resize(num_rows); + std::iota(block_offset_data.begin(), block_offset_data.end(), block_offset); + + res_columns[i] = std::move(column); + } + } +} + Chunk MergeTreeSequentialSource::generate() try { @@ -211,16 +241,16 @@ try if (rows_read) { + fillBlockNumberColumns(columns, sample, data_part->info.min_block, current_row, rows_read); + current_row += rows_read; current_mark += (rows_to_read == rows_read); bool should_evaluate_missing_defaults = false; - reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block); + reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read); if (should_evaluate_missing_defaults) - { reader->evaluateMissingDefaults({}, columns); - } reader->performRequiredConversions(columns); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9959688d889..0ec8fb5c9ad 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp new file mode 100644 index 00000000000..0936c933f9e --- /dev/null +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +static ASTPtr getCompressionCodecDeltaLZ4() +{ + return makeASTFunction("CODEC", + std::make_shared("Delta"), + std::make_shared("LZ4")); +} + +const String RowExistsColumn::name = "_row_exists"; +const DataTypePtr RowExistsColumn::type = std::make_shared(); + +const String BlockNumberColumn::name = "_block_number"; +const DataTypePtr BlockNumberColumn::type = std::make_shared(); +const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4(); + +const String BlockOffsetColumn::name = "_block_offset"; +const DataTypePtr BlockOffsetColumn::type = std::make_shared(); +const ASTPtr BlockOffsetColumn::codec = getCompressionCodecDeltaLZ4(); + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part, UInt64 part_index) +{ + if (column_name == "_part_offset" || column_name == BlockOffsetColumn::name) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column {} is not const and must be filled by range reader", column_name); + + if (column_name == RowExistsColumn::name) + return 1ULL; + + if (column_name == BlockNumberColumn::name) + return part.info.min_block; + + if (column_name == "_part") + return part.name; + + if (column_name == "_part_index") + return part_index; + + if (column_name == "_part_uuid") + return part.uuid; + + if (column_name == "_partition_id") + return part.info.partition_id; + + if (column_name == "_partition_value") + return Tuple(part.partition.value.begin(), part.partition.value.end()); + + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected virtual column name: {}", column_name); +} + +} diff --git a/src/Storages/MergeTreeVirtualColumns.h b/src/Storages/MergeTreeVirtualColumns.h new file mode 100644 index 00000000000..3ee22028d1f --- /dev/null +++ b/src/Storages/MergeTreeVirtualColumns.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IMergeTreeDataPart; + +struct RowExistsColumn +{ + static const String name; + static const DataTypePtr type; +}; + +struct BlockNumberColumn +{ + static const String name; + static const DataTypePtr type; + static const ASTPtr codec; +}; + +struct BlockOffsetColumn +{ + static const String name; + static const DataTypePtr type; + static const ASTPtr codec; +}; + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part, UInt64 part_index); + +} diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 86ed1d03b94..8c13031de3f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -104,7 +104,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 64ff224fc10..bda058c5635 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -349,7 +349,7 @@ Block StorageInMemoryMetadata::getSampleBlockNonMaterialized() const return res; } -Block StorageInMemoryMetadata::getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const +Block StorageInMemoryMetadata::getSampleBlockwithVirtuals(const NamesAndTypesList & virtuals) const { auto res = getSampleBlock(); diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index ecc30f7b756..1fa30084fad 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -174,7 +174,7 @@ struct StorageInMemoryMetadata /// Block with ordinary + materialized + virtuals. Virtuals have to be /// explicitly specified, because they are part of Storage type, not /// Storage metadata. - Block getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const; + Block getSampleBlockwithVirtuals(const NamesAndTypesList & virtuals) const; /// Returns structure with partition key. const KeyDescription & getPartitionKey() const; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index c7b0a9d0644..a31e95025b6 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -22,6 +22,7 @@ #include #include "StorageLogSettings.h" +#include "Storages/StorageSnapshot.h" #include #include #include @@ -35,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -48,8 +49,6 @@ namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; @@ -299,6 +298,7 @@ public: : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(std::make_shared(storage, metadata_snapshot)) , lock(std::move(lock_)) { if (!lock) @@ -343,6 +343,7 @@ public: private: StorageLog & storage; StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; WriteLock lock; bool done = false; @@ -476,13 +477,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; - const auto & columns = metadata_snapshot->getColumns(); - - CompressionCodecPtr compression; - if (name_and_type.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec; - else - compression = columns.getCodecOrDefault(name_and_type.name); + auto compression = storage_snapshot->getCodecOrDefault(name_and_type.name); it = streams.try_emplace(data_file.name, storage.disk, data_file.path, storage.file_checker.getFileSize(data_file.path), diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 34c092c7208..d1ccbaf02dc 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -1,11 +1,12 @@ #include #include -#include +#include #include #include #include #include #include +#include "Storages/ColumnsDescription.h" namespace DB { @@ -18,6 +19,39 @@ namespace ErrorCodes extern const int COLUMN_QUERIED_MORE_THAN_ONCE; } +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsDescription()) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsDescription()) + , object_columns(std::move(object_columns_)) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_, + DataPtr data_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsDescription()) + , object_columns(std::move(object_columns_)) + , data(std::move(data_)) +{ +} + std::shared_ptr StorageSnapshot::clone(DataPtr data_) const { auto res = std::make_shared(storage, metadata, object_columns); @@ -28,17 +62,6 @@ std::shared_ptr StorageSnapshot::clone(DataPtr data_) const return res; } -void StorageSnapshot::init() -{ - for (const auto & [name, type] : storage.getVirtuals()) - virtual_columns[name] = type; - - if (storage.hasLightweightDeletedMask()) - system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type; - - system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; -} - NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const { auto all_columns = getMetadataForQuery()->getColumns().get(options); @@ -46,35 +69,22 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) if (options.with_extended_objects) extendObjectColumns(all_columns, object_columns, options.with_subcolumns); - NameSet column_names; - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None && !virtual_columns.empty()) { - /// Virtual columns must be appended after ordinary, - /// because user can override them. - if (!virtual_columns.empty()) + NameSet column_names; + for (const auto & column : all_columns) + column_names.insert(column.name); + + auto virtuals_list = virtual_columns.get(options.virtuals_kind); + for (const auto & column : virtuals_list) { - for (const auto & column : all_columns) - column_names.insert(column.name); + if (column_names.contains(column.name)) + continue; - for (const auto & [name, type] : virtual_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); + all_columns.emplace_back(column.name, column.type); } } - if (options.with_system_columns) - { - if (!system_columns.empty() && column_names.empty()) - { - for (const auto & column : all_columns) - column_names.insert(column.name); - } - - for (const auto & [name, type] : system_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); - } - return all_columns; } @@ -100,18 +110,11 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt return object_column; } - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None) { - auto it = virtual_columns.find(column_name); - if (it != virtual_columns.end()) - return NameAndTypePair(column_name, it->second); - } - - if (options.with_system_columns) - { - auto it = system_columns.find(column_name); - if (it != system_columns.end()) - return NameAndTypePair(column_name, it->second); + auto virtual_column = virtual_columns.tryGet(column_name, options.virtuals_kind); + if (virtual_column) + return NameAndTypePair{virtual_column->name, virtual_column->type}; } return {}; @@ -126,6 +129,47 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec + ? CompressionCodecFactory::instance().get(column_desc.codec, column_desc.type, default_codec) + : default_codec; + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto virtual_desc = virtual_columns.tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec; +} + +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name) const +{ + return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); +} + +ASTPtr StorageSnapshot::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto virtual_desc = virtual_columns.tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec->getFullCodecDesc(); +} + Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; @@ -143,11 +187,11 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons { res.insert({object_column->type->createColumn(), object_column->type, column_name}); } - else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns.tryGet(column_name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; + const auto & type = virtual_column->type; res.insert({type->createColumn(), type, column_name}); } else @@ -175,12 +219,11 @@ ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & colum { res.add(*object_column, "", false, false); } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns.tryGet(name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; - res.add({name, type}); + res.add({name, virtual_column->type}); } else { @@ -216,7 +259,7 @@ void StorageSnapshot::check(const Names & column_names) const { bool has_column = columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) || object_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) - || virtual_columns.contains(name); + || virtual_columns.has(name); if (!has_column) { diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index d62e118e1f2..cabc4f6e2b4 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -1,10 +1,14 @@ #pragma once #include +#include namespace DB { class IStorage; +class ICompressionCodec; + +using CompressionCodecPtr = std::shared_ptr; /// Snapshot of storage that fixes set columns that can be read in query. /// There are 3 sources of columns: regular columns from metadata, @@ -13,6 +17,7 @@ struct StorageSnapshot { const IStorage & storage; const StorageMetadataPtr metadata; + const VirtualColumnsDescription virtual_columns; const ColumnsDescription object_columns; /// Additional data, on which set of columns may depend. @@ -30,35 +35,18 @@ struct StorageSnapshot StorageSnapshot( const IStorage & storage_, - StorageMetadataPtr metadata_) - : storage(storage_), metadata(std::move(metadata_)) - { - init(); - } + StorageMetadataPtr metadata_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, - ColumnsDescription object_columns_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - { - init(); - } + ColumnsDescription object_columns_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, ColumnsDescription object_columns_, - DataPtr data_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - , data(std::move(data_)) - { - init(); - } + DataPtr data_); std::shared_ptr clone(DataPtr data_) const; @@ -72,6 +60,10 @@ struct StorageSnapshot std::optional tryGetColumn(const GetColumnsOptions & options, const String & column_name) const; NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name) const; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + /// Block with ordinary + materialized + aliases + virtuals + subcolumns. Block getSampleBlockForColumns(const Names & column_names) const; @@ -90,12 +82,6 @@ struct StorageSnapshot private: void init(); - - std::unordered_map virtual_columns; - - /// System columns are not visible in the schema but might be persisted in the data. - /// One example of such column is lightweight delete mask '_row_exists'. - std::unordered_map system_columns; }; using StorageSnapshotPtr = std::shared_ptr; diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 3b2807965a4..540cd6b9a6b 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -52,7 +52,7 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + Block sample_block = storage_snapshot->metadata->getSampleBlockwithVirtuals(getVirtuals()); if (supportsColumnsMask()) { diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 15543208dd9..bc868b93e9c 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -115,7 +115,7 @@ Pipe StorageSystemJemallocBins::read( { storage_snapshot->check(column_names); - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockwithVirtuals(getVirtuals()); MutableColumns res_columns = header.cloneEmptyColumns(); fillJemallocBins(res_columns); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index abf93bf1ac0..42ed1e3deb9 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -243,7 +243,7 @@ void StorageSystemZooKeeper::read( size_t max_block_size, size_t /*num_streams*/) { - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockwithVirtuals(getVirtuals()); auto read_step = std::make_unique(header, query_info, max_block_size, context); query_plan.addStep(std::move(read_step)); } diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp new file mode 100644 index 00000000000..65223071dfe --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -0,0 +1,96 @@ +#include "Core/NamesAndTypes.h" +#include "DataTypes/Serializations/ISerialization.h" +#include "base/types.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DUPLICATE_COLUMN; + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +VirtualColumnDescription::VirtualColumnDescription( + String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_) + : ColumnDescription(std::move(name_), std::move(type_), std::move(codec_), std::move(comment_)) + , kind(kind_) +{ +} + +void VirtualColumnsDescription::add(VirtualColumnDescription desc_) +{ + auto [it, inserted] = container.emplace(std::move(desc_)); + if (!inserted) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Virtual column {} already exists", it->name); +} + +void VirtualColumnsDescription::addEphemeral(String name, DataTypePtr type, String comment) +{ + add({std::move(name), std::move(type), nullptr, std::move(comment), VirtualsKind::Ephemeral}); +} + +void VirtualColumnsDescription::addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment) +{ + add({std::move(name), std::move(type), std::move(codec), std::move(comment), VirtualsKind::Persistent}); +} + +NamesAndTypesList VirtualColumnsDescription::get(VirtualsKind kind) const +{ + NamesAndTypesList result; + for (const auto & column : container) + if (static_cast(column.kind) & static_cast(kind)) + result.emplace_back(column.name, column.type); + return result; +} + +std::optional VirtualColumnsDescription::tryGet(const String & name, VirtualsKind kind) const +{ + auto it = container.find(name); + if (it != container.end() && (static_cast(it->kind) & static_cast(kind))) + return NameAndTypePair{it->name, it->type}; + return {}; +} + +NameAndTypePair VirtualColumnsDescription::get(const String & name, VirtualsKind kind) const +{ + auto column = tryGet(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +std::optional VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const +{ + auto it = container.find(name); + if (it != container.end() && (static_cast(it->kind) & static_cast(kind))) + return *it; + return {}; +} + +VirtualColumnDescription VirtualColumnsDescription::getDescription(const String & name, VirtualsKind kind) const +{ + auto column = tryGetDescription(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & desc : container) + result.emplace_back(desc.name, desc.type); + return result; +} + +Block VirtualColumnsDescription::getSampleBlock() const +{ + Block result; + for (const auto & desc : container) + result.insert({desc.type->createColumn(), desc.type, desc.name}); + return result; +} + +} diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h new file mode 100644 index 00000000000..b93bf29635a --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.h @@ -0,0 +1,68 @@ +#pragma once +#include + +namespace DB +{ + +struct VirtualColumnDescription : public ColumnDescription +{ +public: + using Self = VirtualColumnDescription; + VirtualsKind kind; + + VirtualColumnDescription() = default; + VirtualColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_); + + bool isEphemeral() const { return kind == VirtualsKind::Ephemeral; } + bool isPersistent() const { return kind == VirtualsKind::Persistent; } + + struct Comparator + { + using is_transparent = void; + bool operator()(const Self & lhs, const Self & rhs) const { return lhs.name < rhs.name; } + bool operator()(const Self & lhs, const String & rhs) const { return lhs.name < rhs; } + bool operator()(const String & lhs, const Self & rhs) const { return lhs < rhs.name; } + }; +}; + +class VirtualColumnsDescription +{ +public: + using Container = std::set; + using const_iterator = Container::const_iterator; + + const_iterator begin() const { return container.begin(); } + const_iterator end() const { return container.end(); } + + VirtualColumnsDescription() = default; + + void add(VirtualColumnDescription desc); + void addEphemeral(String name, DataTypePtr type, String comment); + void addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment); + + bool empty() const { return container.empty(); } + bool has(const String & name) const { return container.contains(name); } + + NameAndTypePair get(const String & name, VirtualsKind kind) const; + std::optional tryGet(const String & name, VirtualsKind kind) const; + + NameAndTypePair get(const String & name) const { return get(name, VirtualsKind::All); } + std::optional tryGet(const String & name) const { return tryGet(name, VirtualsKind::All); } + + std::optional tryGetDescription(const String & name, VirtualsKind kind) const; + VirtualColumnDescription getDescription(const String & name, VirtualsKind kind) const; + + std::optional tryGetDescription(const String & name) const { return tryGetDescription(name, VirtualsKind::All); } + VirtualColumnDescription getDescription(const String & name) const { return getDescription(name, VirtualsKind::All); } + + NamesAndTypesList get(VirtualsKind kind) const; + NamesAndTypesList getNamesAndTypesList() const; + + Block getSampleBlock() const; + Block getSampleBlock(const Names & names) const; + +private: + Container container; +}; + +} From 74399253d9a45402218a8d54835e3cbc6be9bd4e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Feb 2024 02:28:30 +0000 Subject: [PATCH 020/356] refactoring of virtual columns --- .../QueryPlan/ReadFromMergeTree.cpp | 47 ++++++---------- src/Processors/QueryPlan/ReadFromMergeTree.h | 16 ++---- src/Storages/BlockNumberColumn.cpp | 0 src/Storages/BlockNumberColumn.h | 0 src/Storages/MergeTree/IMergeTreeReader.cpp | 1 - .../MergeTreeDataPartWriterCompact.cpp | 1 - .../MergeTree/MergeTreeDataSelectExecutor.cpp | 53 ++----------------- .../MergeTree/MergeTreeDataSelectExecutor.h | 1 - .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 - .../MergeTree/MergeTreePrefetchedReadPool.h | 1 - src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 - src/Storages/MergeTree/MergeTreeReadPool.h | 1 - .../MergeTree/MergeTreeReadPoolBase.cpp | 11 +--- .../MergeTree/MergeTreeReadPoolBase.h | 2 - .../MergeTree/MergeTreeReadPoolInOrder.cpp | 2 - .../MergeTree/MergeTreeReadPoolInOrder.h | 1 - .../MergeTreeReadPoolParallelReplicas.cpp | 2 - .../MergeTreeReadPoolParallelReplicas.h | 1 - ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 2 - ...MergeTreeReadPoolParallelReplicasInOrder.h | 1 - src/Storages/MergeTree/MergeTreeReadTask.h | 2 - .../MergeTree/MergeTreeSelectProcessor.cpp | 36 ++----------- .../MergeTree/MergeTreeSelectProcessor.h | 12 +---- src/Storages/MergeTreeVirtualColumns.cpp | 1 + 24 files changed, 31 insertions(+), 167 deletions(-) delete mode 100644 src/Storages/BlockNumberColumn.cpp delete mode 100644 src/Storages/BlockNumberColumn.h diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e2ce36264dd..c095dc06c3a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -265,29 +265,24 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, StorageSnapshotPtr storage_snapshot_, ContextPtr context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot_->getSampleBlockForColumns(real_column_names_), - storage_snapshot_, - query_info_.prewhere_info, - virt_column_names_)}) + storage_snapshot_->getSampleBlockForColumns(all_column_names_), + query_info_.prewhere_info)}) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) , alter_conversions_for_parts(std::move(alter_conversions_)) - , real_column_names(std::move(real_column_names_)) - , virt_column_names(std::move(virt_column_names_)) + , all_column_names(std::move(all_column_names_)) , data(data_) , query_info(query_info_) , prewhere_info(query_info_.prewhere_info) @@ -300,7 +295,7 @@ ReadFromMergeTree::ReadFromMergeTree( .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, .preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes} , requested_num_streams(num_streams_) - , sample_factor_column_queried(sample_factor_column_queried_) + , sample_factor_column_queried(false) /// TODO: kek , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) @@ -380,7 +375,6 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); @@ -395,7 +389,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto processor = std::make_unique( pool, std::move(algorithm), storage_snapshot, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); pipes.emplace_back(std::move(source)); @@ -461,7 +455,6 @@ Pipe ReadFromMergeTree::readFromPool( actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -474,7 +467,6 @@ Pipe ReadFromMergeTree::readFromPool( actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -494,7 +486,7 @@ Pipe ReadFromMergeTree::readFromPool( auto processor = std::make_unique( pool, std::move(algorithm), storage_snapshot, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -550,7 +542,6 @@ Pipe ReadFromMergeTree::readInOrder( actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -565,7 +556,6 @@ Pipe ReadFromMergeTree::readInOrder( actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -600,7 +590,7 @@ Pipe ReadFromMergeTree::readInOrder( auto processor = std::make_unique( pool, std::move(algorithm), storage_snapshot, prewhere_info, - actions_settings, block_size, reader_settings, virt_column_names); + actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -1311,8 +1301,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( requested_num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } @@ -1506,8 +1495,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { @@ -1523,8 +1511,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } @@ -1538,8 +1525,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { @@ -1548,7 +1534,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( size_t total_parts = parts.size(); - result.column_names_to_read = real_column_names; + result.column_names_to_read = all_column_names; /// If there are only virtual columns in the query, you must request at least one non-virtual one. if (result.column_names_to_read.empty()) @@ -1607,7 +1593,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( data, metadata_snapshot, context, - sample_factor_column_queried, log); if (result.sampling.read_nothing) @@ -1724,10 +1709,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info = prewhere_info_value; output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot->getSampleBlockForColumns(real_column_names), - storage_snapshot, - prewhere_info_value, - virt_column_names)}; + storage_snapshot->getSampleBlockForColumns(all_column_names), + prewhere_info_value)}; updateSortDescriptionForOutputStream( *output_stream, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index fdeaff57279..4ecaa5e262f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -110,15 +110,13 @@ public: ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, StorageSnapshotPtr storage_snapshot, ContextPtr context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, @@ -135,8 +133,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; - const Names & getRealColumnNames() const { return real_column_names; } - const Names & getVirtualColumnNames() const { return virt_column_names; } + const Names & getRealColumnNames() const { return all_column_names; } StorageID getStorageID() const { return data.getStorageID(); } const StorageSnapshotPtr & getStorageSnapshot() const { return storage_snapshot; } @@ -166,8 +163,7 @@ public: size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -215,8 +211,7 @@ private: size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -233,8 +228,7 @@ private: MergeTreeData::DataPartsVector prepared_parts; std::vector alter_conversions_for_parts; - Names real_column_names; - Names virt_column_names; + Names all_column_names; const MergeTreeData & data; SelectQueryInfo query_info; diff --git a/src/Storages/BlockNumberColumn.cpp b/src/Storages/BlockNumberColumn.cpp deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/Storages/BlockNumberColumn.h b/src/Storages/BlockNumberColumn.h deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index f774d3a387a..7e5bb0aaea4 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -23,7 +23,6 @@ namespace namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int NO_SUCH_COLUMN_IN_TABLE; } IMergeTreeReader::IMergeTreeReader( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 46c4338ab90..1721fd15b8d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index df1176a9ea5..6b09f85a74d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -69,7 +69,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; extern const int TOO_MANY_PARTITIONS; extern const int DUPLICATED_PART_UUIDS; - extern const int NO_SUCH_COLUMN_IN_TABLE; } @@ -166,7 +165,6 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log) { const Settings & settings = context->getSettingsRef(); @@ -296,7 +294,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (sampling.use_sampling) { - if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) + if (relative_sample_size != RelativeSize(0)) sampling.used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; @@ -869,32 +867,6 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( return nullptr; } -static void selectColumnNames( - const Names & column_names_to_return, - const MergeTreeData & data, - Names & real_column_names, - Names & virt_column_names, - bool & sample_factor_column_queried) -{ - sample_factor_column_queried = false; - const auto & virtual_columns = data.getVirtualsDescription(); - - for (const auto & name : column_names_to_return) - { - if (virtual_columns.has(name)) - { - if (name == "_sample_factor") - sample_factor_column_queried = true; - - virt_column_names.push_back(name); - } - else - { - real_column_names.push_back(name); - } - } -} - ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, @@ -910,14 +882,6 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar if (total_parts == 0) return std::make_shared(); - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - std::optional indexes; /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for: /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. @@ -932,8 +896,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + column_names_to_return, log, indexes); } @@ -960,14 +923,6 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( else if (parts.empty()) return {}; - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - /// Do not keep data parts in snapshot. /// They are stored separately, and some could be released after PK analysis. auto storage_snapshot_copy = storage_snapshot->clone(std::make_unique()); @@ -975,15 +930,13 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( return std::make_unique( std::move(parts), std::move(alter_conversions), - real_column_names, - virt_column_names, + column_names_to_return, data, query_info, storage_snapshot_copy, context, max_block_size, num_streams, - sample_factor_column_queried, max_block_numbers_to_read, log, merge_tree_select_result_ptr, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 17975354187..79f936ae0a8 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -215,7 +215,6 @@ public: const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log); /// Check query limits: max_partitions_to_read, max_concurrent_queries. diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 47c2fe07bb4..07c4b367b2c 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -113,7 +113,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -123,7 +122,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , WithContext(context_) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index b1335fd2774..250631c6264 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -23,7 +23,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 8ed7a9d8707..e0f5eb22950 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -39,7 +39,6 @@ MergeTreeReadPool::MergeTreeReadPool( const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -49,7 +48,6 @@ MergeTreeReadPool::MergeTreeReadPool( actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index e45ccad912f..44726f3877f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -31,7 +31,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 95e42d39cf7..a1d5ddd8729 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -12,7 +12,6 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) @@ -21,7 +20,6 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( , actions_settings(actions_settings_) , reader_settings(reader_settings_) , column_names(column_names_) - , virtual_column_names(virtual_column_names_) , pool_settings(pool_settings_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) @@ -52,22 +50,15 @@ void MergeTreeReadPoolBase::fillPerPartInfos() LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); - Names column_and_virtual_column_names; - column_and_virtual_column_names.reserve(column_names.size() + virtual_column_names.size()); - column_and_virtual_column_names.insert(column_and_virtual_column_names.end(), column_names.begin(), column_names.end()); - column_and_virtual_column_names.insert( - column_and_virtual_column_names.end(), virtual_column_names.begin(), virtual_column_names.end()); read_task_info.task_columns = getReadTaskColumns( part_info, storage_snapshot, - column_and_virtual_column_names, + column_names, prewhere_info, actions_settings, reader_settings, /*with_subcolumns=*/true); - read_task_info.virt_column_names = {virtual_column_names.begin(), virtual_column_names.end()}; - if (pool_settings.preferred_block_size_bytes > 0) { const auto & result_column_names = read_task_info.task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index a0c6a30fb63..5ec2cf454c3 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -28,7 +28,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); @@ -42,7 +41,6 @@ protected: const ExpressionActionsSettings actions_settings; const MergeTreeReaderSettings reader_settings; const Names column_names; - const Names virtual_column_names; const PoolSettings pool_settings; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 1b621ad5055..dd341f6f750 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -17,7 +17,6 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -27,7 +26,6 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , has_limit_below_one_block(has_limit_below_one_block_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index d9cc1ba4984..1b846fdb22a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -16,7 +16,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 47436ed1407..24c0e2525ff 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -18,7 +18,6 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -28,7 +27,6 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 6a548dffe37..d9895b3e14b 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -16,7 +16,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index a822a517933..a34bd05ae63 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -17,7 +17,6 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( @@ -27,7 +26,6 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 3e5f8f5dfba..207dfb7899a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -17,7 +17,6 @@ public: const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 21ec19ee033..b69abac5451 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -60,8 +60,6 @@ struct MergeTreeReadTaskInfo AlterConversionsPtr alter_conversions; /// Column names to read during PREWHERE and WHERE MergeTreeReadTaskColumns task_columns; - /// Virtual column names to read - NameSet virt_column_names; /// Shared initialized size predictor. It is copied for each new task. MergeTreeBlockSizePredictorPtr shared_size_predictor; }; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 8e089741cab..d94d4506ee6 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -32,8 +32,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) + const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) , storage_snapshot(storage_snapshot_) @@ -42,7 +41,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) , reader_settings(reader_settings_) , block_size_params(block_size_params_) - , virt_column_names(virt_column_names_) + , result_header(applyPrewhereActions(pool->getHeader(), prewhere_info)) { if (reader_settings.apply_deleted_mask) { @@ -59,10 +58,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( lightweight_delete_filter_step = std::make_shared(std::move(step)); } - result_header = pool->getHeader(); - injectVirtualColumns(result_header, storage_snapshot, virt_column_names); - result_header = applyPrewhereActions(result_header, prewhere_info); - if (!prewhere_actions.steps.empty()) LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); @@ -181,23 +176,6 @@ void MergeTreeSelectProcessor::initializeRangeReaders() task->initializeRangeReaders(all_prewhere_actions); } -void MergeTreeSelectProcessor::injectVirtualColumns( - Block & block, - const StorageSnapshotPtr & storage_snapshot, - const Names & virtual_columns) -{ - for (const auto & virtual_column_name : virtual_columns) - { - auto column = storage_snapshot->virtual_columns.tryGet(virtual_column_name); - if (!column) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "There is no virtual column {} in table {}", - virtual_column_name, storage_snapshot->storage.getStorageID().getNameForLogs()); - - block.insert({column->type->createColumn(), column->type, column->name}); - } -} - Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) { if (prewhere_info) @@ -249,15 +227,9 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere return block; } -Block MergeTreeSelectProcessor::transformHeader( - Block block, - const StorageSnapshotPtr & storage_snapshot, - const PrewhereInfoPtr & prewhere_info, - const Names & virtual_columns) +Block MergeTreeSelectProcessor::transformHeader(Block block, const PrewhereInfoPtr & prewhere_info) { - injectVirtualColumns(block, storage_snapshot, virtual_columns); - auto transformed = applyPrewhereActions(std::move(block), prewhere_info); - return transformed; + return applyPrewhereActions(std::move(block), prewhere_info); } } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 9b399003909..3afb4f3fac0 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -46,17 +46,11 @@ public: const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_); + const MergeTreeReaderSettings & reader_settings_); String getName() const; - static Block transformHeader( - Block block, - const StorageSnapshotPtr & storage_snapshot, - const PrewhereInfoPtr & prewhere_info, - const Names & virtual_columns); - + static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info); Block getHeader() const { return result_header; } ChunkAndProgress read(); @@ -83,7 +77,6 @@ private: }; /// Used for filling header with no rows as well as block with data - static void injectVirtualColumns(Block & block, const StorageSnapshotPtr & storage_snapshot, const Names & virtual_columns); static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); /// Sets up range readers corresponding to data readers @@ -99,7 +92,6 @@ private: const MergeTreeReaderSettings reader_settings; const MergeTreeReadTask::BlockSizeParams block_size_params; - const Names virt_column_names; /// Current task to read from. MergeTreeReadTaskPtr task; diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 0936c933f9e..8c41ab2d16e 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int LOGICAL_ERROR; } static ASTPtr getCompressionCodecDeltaLZ4() From 462f597c6b2c5e015c934e47810e06e75dd9fa0c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 21 Feb 2024 11:02:05 +0100 Subject: [PATCH 021/356] Update run.sh --- docker/test/upgrade/run.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index d8ba48909b6..262334cf7ee 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -77,6 +77,18 @@ remove_keeper_config "async_replication" "1" # create_if_not_exists feature flag doesn't exist on some older versions remove_keeper_config "create_if_not_exists" "[01]" +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ + | sed "s|azure>|>azure_blob_storage>|" \ + > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local>|>local_blob_storage>|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml @@ -103,11 +115,13 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml +#todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ | sed "s|azure>|>azure_blob_storage>|" \ > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +#todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ | sed "s|local>|>local_blob_storage>|" \ > /etc/clickhouse-server/config.d/storage_conf.xml.tmp From 3d57237fe8c8b1c9379f62a78cf6efcec02d6727 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Feb 2024 16:40:27 +0000 Subject: [PATCH 022/356] refactoring of virtual columns --- .../QueryPlan/ReadFromMergeTree.cpp | 27 ++++------------- src/Processors/QueryPlan/ReadFromMergeTree.h | 3 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 4 ++- src/Storages/MergeTree/IMergeTreeReader.cpp | 29 +++++++++++++------ src/Storages/MergeTree/IMergeTreeReader.h | 10 +++---- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 4 +-- .../MergeTree/MergeTreeDataPartCompact.h | 2 +- .../MergeTree/MergeTreeDataPartInMemory.cpp | 4 +-- .../MergeTree/MergeTreeDataPartInMemory.h | 2 +- .../MergeTree/MergeTreeDataPartWide.cpp | 4 +-- .../MergeTree/MergeTreeDataPartWide.h | 2 +- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 ++ .../MergeTree/MergeTreePrefetchedReadPool.h | 1 + src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 ++ src/Storages/MergeTree/MergeTreeReadPool.h | 1 + .../MergeTree/MergeTreeReadPoolBase.cpp | 5 ++++ .../MergeTree/MergeTreeReadPoolBase.h | 2 ++ .../MergeTree/MergeTreeReadPoolInOrder.cpp | 2 ++ .../MergeTree/MergeTreeReadPoolInOrder.h | 1 + .../MergeTreeReadPoolParallelReplicas.cpp | 2 ++ .../MergeTreeReadPoolParallelReplicas.h | 1 + ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 2 ++ ...MergeTreeReadPoolParallelReplicasInOrder.h | 1 + src/Storages/MergeTree/MergeTreeReadTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeReadTask.h | 10 ++++++- .../MergeTree/MergeTreeReaderCompact.cpp | 4 +-- .../MergeTree/MergeTreeReaderCompact.h | 2 +- .../MergeTree/MergeTreeReaderInMemory.cpp | 4 +-- .../MergeTree/MergeTreeReaderInMemory.h | 2 +- .../MergeTree/MergeTreeReaderWide.cpp | 4 +-- src/Storages/MergeTree/MergeTreeReaderWide.h | 2 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 1 - .../MergeTree/MergeTreeSequentialSource.cpp | 4 +-- src/Storages/MergeTreeVirtualColumns.cpp | 10 ++----- src/Storages/MergeTreeVirtualColumns.h | 2 +- 36 files changed, 91 insertions(+), 71 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c095dc06c3a..22ccd623196 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -295,20 +295,11 @@ ReadFromMergeTree::ReadFromMergeTree( .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, .preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes} , requested_num_streams(num_streams_) - , sample_factor_column_queried(false) /// TODO: kek , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) , is_parallel_reading_from_replicas(enable_parallel_reading) { - if (sample_factor_column_queried) - { - /// Only _sample_factor virtual column is added by ReadFromMergeTree - /// Other virtual columns are added by MergeTreeSelectProcessor. - auto type = std::make_shared(); - output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); - } - if (is_parallel_reading_from_replicas) { all_ranges_callback = context->getMergeTreeAllRangesCallback(); @@ -370,6 +361,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto pool = std::make_shared( std::move(extension), std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, @@ -450,6 +442,7 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, @@ -462,6 +455,7 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, @@ -537,6 +531,7 @@ Pipe ReadFromMergeTree::readInOrder( std::move(extension), mode, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, @@ -551,6 +546,7 @@ Pipe ReadFromMergeTree::readInOrder( has_limit_below_one_block, read_type, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, @@ -1904,6 +1900,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons { auto result = getAnalysisResult(); result.checkLimits(context->getSettingsRef(), query_info); + shared_virtual_fields.emplace("_sample_factor", result.sampling.used_sample_factor); LOG_DEBUG( log, @@ -1988,18 +1985,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); }; - /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. - if (sample_factor_column_queried) - { - ColumnWithTypeAndName column; - column.name = "_sample_factor"; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor)); - - auto adding_column = ActionsDAG::makeAddingColumnActions(std::move(column)); - append_actions(std::move(adding_column)); - } - if (result_projection) cur_header = result_projection->updateHeader(cur_header); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 4ecaa5e262f..02c6cc3463b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -243,7 +243,6 @@ private: size_t requested_num_streams; size_t output_streams_limit = 0; - const bool sample_factor_column_queried; /// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently). bool output_each_partition_through_separate_port = false; @@ -284,7 +283,9 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; + AnalysisResultPtr analyzed_result_ptr; + VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; std::optional all_ranges_callback; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index d5b024164a2..ab6df012195 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -70,6 +71,7 @@ public: using Checksums = MergeTreeDataPartChecksums; using Checksum = MergeTreeDataPartChecksums::Checksum; using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; using MergeTreeWriterPtr = std::unique_ptr; @@ -95,7 +97,7 @@ public: const NamesAndTypesList & columns_, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 7e5bb0aaea4..73ccd71b217 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -23,12 +24,13 @@ namespace namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -51,7 +53,7 @@ IMergeTreeReader::IMergeTreeReader( , part_columns(data_part_info_for_read->isWidePart() ? data_part_info_for_read->getColumnsDescriptionWithCollectedNested() : data_part_info_for_read->getColumnsDescription()) - , read_task_info(read_task_info_) + , virtual_fields(virtual_fields_) { columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); @@ -70,12 +72,11 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const { - chassert(read_task_info != nullptr); - - const IMergeTreeDataPart * part = read_task_info->data_part.get(); - if (part->isProjectionPart()) - part = part->getParentPart(); + const auto * loaded_part_info = typeid_cast(data_part_info_for_read.get()); + if (!loaded_part_info) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader"); + const auto & data_part = loaded_part_info->getDataPart(); const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns(); const auto & virtual_columns = storage_snapshot->virtual_columns; @@ -90,11 +91,21 @@ void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const continue; if (!it->type->equals(*virtual_column->type)) + { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Data type for virtual column {} mismatched. Requested type: {}, Virtual column type: {}", + "Data type for virtual column {} mismatched. Requested type: {}, virtual column type: {}", it->name, it->type->getName(), virtual_column->type->getName()); + } + + if (it->name == "_part_offset" || it->name == BlockOffsetColumn::name) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); + + Field field; + if (auto field_it = virtual_fields.find(it->name); field_it != virtual_fields.end()) + field = field_it->second; + else + field = getFieldForConstVirtualColumn(it->name, *data_part); - auto field = getFieldForConstVirtualColumn(it->name, *part, read_task_info->part_index_in_query); columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst(); } } diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 3fe9853fced..32877cd88eb 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -2,7 +2,6 @@ #include #include -#include "DataTypes/Serializations/ISerialization.h" #include #include #include @@ -10,8 +9,6 @@ namespace DB { -class IDataType; - /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. /// Avoids loading the marks file if it is not needed (e.g. when reading the whole part). @@ -19,12 +16,13 @@ class IMergeTreeReader : private boost::noncopyable { public: using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using DeserializeBinaryBulkStateMap = std::map; IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -119,8 +117,8 @@ private: /// Actual columns description in part. const ColumnsDescription & part_columns; - /// Shared information required for reading. - MergeTreeReadTaskInfoPtr read_task_info; + /// TODO: + VirtualFields virtual_fields; }; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a3a4cb9619f..d5e2aa9f78b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1030,7 +1030,7 @@ Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPart for (auto & column : block) { - auto field = getFieldForConstVirtualColumn(column.name, *part, 0); + auto field = getFieldForConstVirtualColumn(column.name, *part); column.column->assumeMutableRef().insert(field); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index ee9fa30d98d..8e3f2e07684 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -33,7 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -48,7 +48,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( return std::make_unique( read_info, columns_to_read, - read_task_info_, + virtual_fields, storage_snapshot, uncompressed_cache, mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 479cb23c3e0..a97d15a08f3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -32,7 +32,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index d4d04bd55c2..e023ae9be0e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -33,7 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, const AlterConversionsPtr & alter_conversions, @@ -48,7 +48,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( read_info, ptr, columns_to_read, - read_task_info_, + virtual_fields, storage_snapshot, mark_ranges, reader_settings); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index d3e6a9a5b27..90b4b0e3471 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -21,7 +21,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 5c8b3f9e357..018b8a35534 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -31,7 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -43,7 +43,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( return std::make_unique( read_info, columns_to_read, - read_task_info_, + virtual_fields, storage_snapshot, uncompressed_cache, mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 7bc4fe8c777..a8710dad679 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -27,7 +27,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 07c4b367b2c..63d8288eacf 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -108,6 +108,7 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -117,6 +118,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 250631c6264..0c8a6716d40 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -18,6 +18,7 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo public: MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index e0f5eb22950..ea75943adf9 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -34,6 +34,7 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column MergeTreeReadPool::MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -43,6 +44,7 @@ MergeTreeReadPool::MergeTreeReadPool( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 44726f3877f..cb0e8a9657f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -26,6 +26,7 @@ public: MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index a1d5ddd8729..b63be82e4ca 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -7,6 +7,7 @@ namespace DB MergeTreeReadPoolBase::MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -15,6 +16,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) + , shared_virtual_fields(std::move(shared_virtual_fields_)) , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) @@ -59,6 +61,9 @@ void MergeTreeReadPoolBase::fillPerPartInfos() reader_settings, /*with_subcolumns=*/true); + read_task_info.const_virtual_fields = shared_virtual_fields; + read_task_info.const_virtual_fields.emplace("_part_index", read_task_info.part_index_in_query); + if (pool_settings.preferred_block_size_bytes > 0) { const auto & result_column_names = read_task_info.task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 5ec2cf454c3..1b5bfec5898 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -23,6 +23,7 @@ public: MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -36,6 +37,7 @@ public: protected: /// Initialized in constructor const RangesInDataParts parts_ranges; + const VirtualFields shared_virtual_fields; const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index dd341f6f750..4c0391ffa57 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -12,6 +12,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -21,6 +22,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index 1b846fdb22a..9fedf396a6b 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -11,6 +11,7 @@ public: bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 24c0e2525ff..38035d97f56 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -22,6 +23,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index d9895b3e14b..ca159edb91c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -11,6 +11,7 @@ public: MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index a34bd05ae63..01c0a9f91be 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -12,6 +12,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, @@ -21,6 +22,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 207dfb7899a..4fe3f7a699c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -12,6 +12,7 @@ public: ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index f70c8f973cf..7bbabf6a18d 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -44,7 +44,7 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( columns_to_read, extras.storage_snapshot, ranges, - read_info, + read_info->const_virtual_fields, extras.uncompressed_cache, extras.mark_cache, read_info->alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index b69abac5451..509cbccd83e 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -20,6 +20,8 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr; using MergeTreeReaderPtr = std::unique_ptr; +using VirtualFields = std::unordered_map; + enum class MergeTreeReadType { @@ -62,6 +64,8 @@ struct MergeTreeReadTaskInfo MergeTreeReadTaskColumns task_columns; /// Shared initialized size predictor. It is copied for each new task. MergeTreeBlockSizePredictorPtr shared_size_predictor; + /// TODO: comment + VirtualFields const_virtual_fields; }; using MergeTreeReadTaskInfoPtr = std::shared_ptr; @@ -116,7 +120,11 @@ public: }; MergeTreeReadTask( - MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); + MergeTreeReadTaskInfoPtr info_, + Readers readers_, + MarkRanges mark_ranges_, + + MergeTreeBlockSizePredictorPtr size_predictor_); void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 65b578e065d..63824366722 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -30,7 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, - read_task_info_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index c87e4889d26..769e6a08be4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,7 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index e628bd76cb4..91fc8966a7a 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,14 +19,14 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, - read_task_info_, + virtual_fields_, storage_snapshot_, nullptr, nullptr, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index 161b615a511..cc1e2e9e4e2 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,7 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 8270f2452c2..d34a58a25b0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,7 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -36,7 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, - read_task_info_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index ecfaa43a3f8..a9a5526dd65 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,7 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const MergeTreeReadTaskInfoPtr & read_task_info_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index d94d4506ee6..8d0264cd16e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; extern const int QUERY_WAS_CANCELLED; - extern const int NO_SUCH_COLUMN_IN_TABLE; } MergeTreeSelectProcessor::MergeTreeSelectProcessor( diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 73689e15cd4..35f5782b95a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -182,8 +182,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( columns_for_reader, storage_snapshot, *mark_ranges, - /* read_task_info = */ nullptr, - /* uncompressed_cache = */ nullptr, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, mark_cache.get(), alter_conversions, reader_settings, diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 8c41ab2d16e..94168ccd95a 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -31,11 +31,8 @@ const String BlockOffsetColumn::name = "_block_offset"; const DataTypePtr BlockOffsetColumn::type = std::make_shared(); const ASTPtr BlockOffsetColumn::codec = getCompressionCodecDeltaLZ4(); -Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part, UInt64 part_index) +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part) { - if (column_name == "_part_offset" || column_name == BlockOffsetColumn::name) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column {} is not const and must be filled by range reader", column_name); - if (column_name == RowExistsColumn::name) return 1ULL; @@ -45,9 +42,6 @@ Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTree if (column_name == "_part") return part.name; - if (column_name == "_part_index") - return part_index; - if (column_name == "_part_uuid") return part.uuid; @@ -57,7 +51,7 @@ Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTree if (column_name == "_partition_value") return Tuple(part.partition.value.begin(), part.partition.value.end()); - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected virtual column name: {}", column_name); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected const virtual column: {}", column_name); } } diff --git a/src/Storages/MergeTreeVirtualColumns.h b/src/Storages/MergeTreeVirtualColumns.h index 3ee22028d1f..cd9fe544ed8 100644 --- a/src/Storages/MergeTreeVirtualColumns.h +++ b/src/Storages/MergeTreeVirtualColumns.h @@ -28,6 +28,6 @@ struct BlockOffsetColumn static const ASTPtr codec; }; -Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part, UInt64 part_index); +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part); } From 6656cdb9fbd8aef2d7efaef501c61be37b9f1be4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Feb 2024 17:24:12 +0000 Subject: [PATCH 023/356] refactoring of virtual columns --- .../MergeTree/MergeTreeBlockReadUtils.cpp | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 8eb714c7e24..f97e07751e0 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -7,6 +7,7 @@ #include #include #include "Storages/ColumnsDescription.h" +#include "Storages/MergeTreeVirtualColumns.h" #include #include #include @@ -107,21 +108,14 @@ NameSet injectRequiredColumns( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withVirtuals(VirtualsKind::Persistent) + .withVirtuals() .withSubcolumns(with_subcolumns); - auto virtuals_options = GetColumnsOptions(GetColumnsOptions::None).withVirtuals(); - for (size_t i = 0; i < columns.size(); ++i) { /// We are going to fetch physical columns and system columns first if (!storage_snapshot->tryGetColumn(options, columns[i])) - { - if (storage_snapshot->tryGetColumn(virtuals_options, columns[i])) - continue; - else - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]); - } + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]); have_at_least_one_physical_column |= injectRequiredColumnsRecursively( columns[i], storage_snapshot, alter_conversions, @@ -285,12 +279,20 @@ MergeTreeReadTaskColumns getReadTaskColumns( .withVirtuals() .withSubcolumns(with_subcolumns); - bool has_part_offset = std::find(required_columns.begin(), required_columns.end(), "_part_offset") != required_columns.end(); + static const NameSet columns_to_read_at_first_step = {"_part_offset", BlockOffsetColumn::name}; + NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { Names step_column_names; + if (columns_from_previous_steps.empty()) + { + for (const auto & required_column : required_columns) + if (columns_to_read_at_first_step.contains(required_column)) + step_column_names.push_back(required_column); + } + /// Computation results from previous steps might be used in the current step as well. In such a case these /// computed columns will be present in the current step inputs. They don't need to be read from the disk so /// exclude them from the list of columns to read. This filtering must be done before injecting required @@ -301,13 +303,6 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (!columns_from_previous_steps.contains(name)) step_column_names.push_back(name); - /// Make sure _part_offset is read in STEP 0 - if (columns_from_previous_steps.empty() && has_part_offset) - { - if (std::find(step_column_names.begin(), step_column_names.end(), "_part_offset") == step_column_names.end()) - step_column_names.push_back("_part_offset"); - } - if (!step_column_names.empty()) injectRequiredColumns( data_part_info_for_reader, storage_snapshot, From 2836d0bb55ff5a5eae3f397e3964020e84320b5f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:46:42 +0100 Subject: [PATCH 024/356] Update run.sh --- docker/test/upgrade/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 262334cf7ee..9fd54995ab4 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -79,13 +79,13 @@ remove_keeper_config "create_if_not_exists" "[01]" #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ - | sed "s|azure>|>azure_blob_storage>|" \ + | sed "s|azure|azure_blob_storage|" \ > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ - | sed "s|local>|>local_blob_storage>|" \ + | sed "s|local|local_blob_storage|" \ > /etc/clickhouse-server/config.d/storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml @@ -117,13 +117,13 @@ sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-serv #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ - | sed "s|azure>|>azure_blob_storage>|" \ + | sed "s|azure|azure_blob_storage|" \ > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ - | sed "s|local>|>local_blob_storage>|" \ + | sed "s|local|local_blob_storage|" \ > /etc/clickhouse-server/config.d/storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml From c624e2fa8bb78e186fa00ded925cf5bd24590d7c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Feb 2024 18:56:28 +0000 Subject: [PATCH 025/356] fix style check --- src/Storages/MergeTreeVirtualColumns.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 94168ccd95a..1a576bef017 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -10,7 +10,6 @@ namespace DB namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; - extern const int LOGICAL_ERROR; } static ASTPtr getCompressionCodecDeltaLZ4() From 501e05fda18004aab9abae8945eaa6eacdbc94e7 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 22 Feb 2024 09:22:28 +0800 Subject: [PATCH 026/356] fixed: improber filter setting --- src/Interpreters/HashJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 33dc178ca00..407a064483f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1541,7 +1541,7 @@ size_t joinRightColumnsSwitchNullability( } else { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } From 601c3c33bc802b6c95ab2371379da4da90bd2baa Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Feb 2024 17:44:41 +0100 Subject: [PATCH 027/356] Set correct execution name for ConstantNode --- src/Analyzer/ConstantNode.cpp | 18 ++++++++++++++++++ src/Analyzer/ConstantNode.h | 2 ++ src/Planner/PlannerActionsVisitor.cpp | 14 ++++++++++++-- .../02227_union_match_by_name.reference | 2 +- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index ce6da693f93..83446ca7d16 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "Analyzer/FunctionNode.h" #include #include @@ -91,6 +92,23 @@ bool ConstantNode::requiresCastCall() const return need_to_add_cast_function || source_expression != nullptr; } +bool ConstantNode::receivedFromInitiatorServer() const +{ + if (!hasSourceExpression()) + return false; + + auto * cast_function = getSourceExpression()->as(); + if (!cast_function || cast_function->getFunctionName() != "_CAST") + return false; + for (auto const & argument : cast_function->getArguments()) + { + auto * constant_arg = argument->as(); + if (!constant_arg || constant_arg->hasSourceExpression()) + return false; + } + return true; +} + void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const { buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index c0df092293d..18090c56630 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -77,6 +77,8 @@ public: bool requiresCastCall() const; + bool receivedFromInitiatorServer() const; + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index b33e1a3509c..89d843a28ac 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -111,7 +111,12 @@ public: else { if (constant_node.hasSourceExpression()) - result = calculateActionNodeName(constant_node.getSourceExpression()); + { + if (constant_node.receivedFromInitiatorServer()) + result = calculateActionNodeNameForConstant(constant_node); + else + result = calculateActionNodeName(constant_node.getSourceExpression()); + } else result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); } @@ -562,7 +567,12 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi else { if (constant_node.hasSourceExpression()) - return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + { + if (constant_node.receivedFromInitiatorServer()) + return calculateActionNodeNameForConstant(constant_node); + else + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + } else return calculateConstantActionNodeName(constant_literal, constant_type); } diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index c28035fab49..d726ae86de7 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -36,7 +36,7 @@ Header: avgWeighted(x, y) Nullable(Float64) Header: x Nullable(Nothing) y UInt8 Expression (Projection) - Header: NULL_Nullable(Nothing) Nullable(Nothing) + Header: _CAST(NULL_Nullable(Nothing), \'Nullable(Nothing)\'_String) Nullable(Nothing) 1_UInt8 UInt8 Expression (Change column names to column identifiers) Header: __table5.dummy UInt8 From 81df9c5a273e2f6533147889c929c4a519dc08af Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Feb 2024 19:07:54 +0100 Subject: [PATCH 028/356] Use possition to merge plans --- src/Storages/StorageMerge.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 591a0ae375e..79d7b83cada 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1390,7 +1390,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const RowPolicyDataOpt & row_policy_data_opt, ContextMutablePtr local_context, QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage [[maybe_unused]]) + QueryProcessingStage::Enum processed_stage) { Block before_block_header = builder.getHeader(); @@ -1449,9 +1449,9 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - // if (local_context->getSettingsRef().allow_experimental_analyzer - // && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) - // convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; + if (local_context->getSettingsRef().allow_experimental_analyzer + && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) { From d3c6761dcf11d2cf8cbc6f88c26dd0e45af891d2 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 23 Feb 2024 12:06:17 +0100 Subject: [PATCH 029/356] Update run.sh --- docker/test/upgrade/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index fe0c1212b4e..79224c581af 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -104,6 +104,8 @@ rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml +cat /etc/clickhouse-server/config.d/storage_conf.xml + start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log From 011c694117845500c82f9563c65930429979982f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 23 Feb 2024 01:34:17 +0100 Subject: [PATCH 030/356] add none_only_active DDL output mode --- docs/en/operations/settings/settings.md | 1 + src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 1 + src/Core/SettingsEnums.h | 1 + src/Interpreters/executeDDLQueryOnCluster.cpp | 25 ++++++++++++------- ...distributed_ddl_output_mode_long.reference | 4 +-- .../01175_distributed_ddl_output_mode_long.sh | 4 +-- .../02447_drop_database_replica.reference | 2 ++ .../02447_drop_database_replica.sh | 5 ++-- 9 files changed, 29 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b11a04e10ec..d076eeabc9a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3956,6 +3956,7 @@ Possible values: - `none` — Is similar to throw, but distributed DDL query returns no result set. - `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. - `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. +- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background. - `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database - `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 433195af9c3..b6437e98a85 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -695,7 +695,7 @@ class IColumn; M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ - M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \ + M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 0c84c1cc7d2..892d10425ad 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -117,6 +117,7 @@ IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS, {"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT}, {"throw_only_active", DistributedDDLOutputMode::THROW_ONLY_ACTIVE}, {"null_status_on_timeout_only_active", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE}, + {"none_only_active", DistributedDDLOutputMode::NONE_ONLY_ACTIVE}, {"never_throw", DistributedDDLOutputMode::NEVER_THROW}}) IMPLEMENT_SETTING_ENUM(StreamingHandleErrorMode, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 246cdf6f684..2f29519061f 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -175,6 +175,7 @@ enum class DistributedDDLOutputMode NEVER_THROW, THROW_ONLY_ACTIVE, NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE, + NONE_ONLY_ACTIVE, }; DECLARE_SETTING_ENUM(DistributedDDLOutputMode) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 6122ec6180a..df8236c11f4 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -252,7 +252,8 @@ BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & en auto source = std::make_shared(node_path, entry, context, hosts_to_wait); io.pipeline = QueryPipeline(std::move(source)); - if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE) + if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE || + context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) io.pipeline.complete(std::make_shared(io.pipeline.getHeader())); return io; @@ -264,7 +265,9 @@ Block DDLQueryStatusSource::getSampleBlock(ContextPtr context_, bool hosts_to_wa auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr { - if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE) + if (output_mode == DistributedDDLOutputMode::THROW || + output_mode == DistributedDDLOutputMode::NONE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) return type; return std::make_shared(type); }; @@ -313,14 +316,15 @@ DDLQueryStatusSource::DDLQueryStatusSource( { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE; + || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end()); is_replicated_database = true; only_running_hosts = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || - output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE; + output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; } else { @@ -442,14 +446,16 @@ Chunk DDLQueryStatusSource::generate() size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; size_t num_active_hosts = current_active_hosts.size(); - constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " - "There are {} unfinished hosts ({} of them are currently executing the task), " - "they are going to execute the query in background"; + constexpr auto msg_format = "Distributed DDL task {} is not finished on {} of {} hosts " + "({} of them are currently executing the task, {} are inactive). " + "They are going to execute the query in background. Was waiting for {} seconds{}"; + if (throw_on_timeout) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, - msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts)); + msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : ", which is longer than distributed_ddl_task_timeout")); /// For Replicated database print a list of unfinished hosts as well. Will return empty block on next iteration. if (is_replicated_database) @@ -457,7 +463,8 @@ Chunk DDLQueryStatusSource::generate() return {}; } - LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts); + LOG_INFO(log, msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : "which is longer than distributed_ddl_task_timeout"); return generateChunkWithUnfinishedHosts(); } diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference index b9a66a1e1a9..2151328d8b7 100644 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference @@ -3,7 +3,7 @@ Received exception from server: Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.none already exists. (TABLE_ALREADY_EXISTS) (query: create table none on cluster test_shard_localhost (n int) engine=Memory;) Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists none on cluster test_unavailable_shard;) throw localhost 9000 0 0 0 @@ -12,7 +12,7 @@ Code: 57. Error: Received from localhost:9000. Error: There was an error on [loc (query: create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;) localhost 9000 0 1 0 Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists throw on cluster test_unavailable_shard;) null_status_on_timeout localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh index 12e142adda9..f17e85da60a 100755 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh @@ -54,14 +54,14 @@ $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test_shard_localhost (n int) engine=Memory;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" # Timeout -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=throw -q "select value from system.settings where name='distributed_ddl_output_mode';" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory;" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=null_status_on_timeout -q "select value from system.settings where name='distributed_ddl_output_mode';" diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index 1af3ee244f1..bd3b689ca3c 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -13,6 +13,7 @@ t rdb_default 1 1 s1 r1 1 2 2 +2 s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 @@ -24,4 +25,5 @@ rdb_default 1 2 s1 r2 0 t t2 t3 +t4 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index fb89db5045b..5c4604bc8cd 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,8 +33,9 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log" | sort +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" From d13b2a91c1bfbb7f59b5380ceb1a5057e88043da Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 23 Feb 2024 13:33:12 +0000 Subject: [PATCH 031/356] Fix virtual tables --- src/Interpreters/ActionsDAG.cpp | 2 +- src/Storages/StorageMerge.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7240679abb7..7412eea5e32 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1318,7 +1318,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( size_t num_result_columns = result.size(); if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns) - throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match"); + throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match (source: {} and result: {})", num_input_columns, num_result_columns); if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79d7b83cada..5b18fb8f838 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,7 +1001,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) { ColumnWithTypeAndName column; column.name = "_database"; @@ -1016,7 +1016,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && !pipe_header.has("_table")) + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) { ColumnWithTypeAndName column; column.name = "_table"; From d507884a1d905bc729682add414d5b4048bfa686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 23 Feb 2024 17:59:25 +0100 Subject: [PATCH 032/356] Force reanalysis if parallel replicas changed --- src/Interpreters/InterpreterSelectQuery.cpp | 13 +++++++++++++ .../02972_parallel_replicas_cte.reference | 1 + .../0_stateless/02972_parallel_replicas_cte.sql | 9 +++++++++ 3 files changed, 23 insertions(+) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d34294b4c4b..0a2b8d9a0d7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -777,12 +777,25 @@ InterpreterSelectQuery::InterpreterSelectQuery( result_header = getSampleBlockImpl(); }; + /// This is a hack to make sure we reanalyze if GlobalSubqueriesVisitor changed allow_experimental_parallel_reading_from_replicas + UInt64 parallel_replicas_before_analysis + = context->hasQueryContext() ? context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas : 0; analyze(shouldMoveToPrewhere()); bool need_analyze_again = false; bool can_analyze_again = false; + if (context->hasQueryContext()) { + /// No buts or ifs, if the analysis changed this setting we must reanalyze without parallel replicas + if (context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas + != parallel_replicas_before_analysis) + { + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("max_parallel_replicas", UInt64{0}); + need_analyze_again = true; + } + /// Check number of calls of 'analyze' function. /// If it is too big, we will not analyze the query again not to have exponential blowup. std::atomic & current_query_analyze_count = context->getQueryContext()->kitchen_sink.analyze_counter; diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference index 449fe3d34e3..3321ade3a24 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference @@ -1,3 +1,4 @@ 990000 990000 10 +990000 diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql index 51ce18784da..c7143b5aa93 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql @@ -28,5 +28,14 @@ SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_fr SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; +-- Being a subquery should still disable parallel replicas +SELECT * +FROM +( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; From 83e7f3e1a1be759775776838b9e11bccc298a10c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Feb 2024 21:16:16 +0100 Subject: [PATCH 033/356] Fix bugfix check (due to unknown commit_logs_cache_size_threshold) Bugfix check is broken on CI [1], because of: 2024.02.23 06:12:59.294500 [ 757 ] {} Application: Code: 115. DB::Exception: Unknown setting latest_logs_cache_size_threshold: in Coordination settings config. (UNKNOWN_SETTING), Stack trace (when copying this message, always include the lines below): [1]: https://s3.amazonaws.com/clickhouse-test-reports/60031/89eb2a38c0a7c113b7b0a96c8069fea6eb48ae9d/bugfix_validation.html Signed-off-by: Azat Khuzhin --- docker/test/stateless/run.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index aec2add2857..f2f9d6ac3bf 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -59,6 +59,16 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml + + function remove_keeper_config() + { + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "/<$1>$2<\/$1>/d" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + } + # commit_logs_cache_size_threshold setting doesn't exist on some older versions + remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" fi # For flaky check we also enable thread fuzzer From 3c5d6c4df042942f5bbc5d5b7612c3ae5f6b4ba8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 25 Feb 2024 15:00:26 +0100 Subject: [PATCH 034/356] Revert "Merge pull request #60373 from azat/tests/detect-io_uring" This reverts commit 648e6ec5a63f777c5c44405d45574aaebf77142b, reversing changes made to 273b4835a4cac50fb5fa57809ea78a343bf2b125. --- src/Storages/StorageFile.cpp | 19 ------------------- tests/clickhouse-test | 27 +++++---------------------- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7d674fea9ca..595573b566d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include #include @@ -94,7 +92,6 @@ namespace ErrorCodes extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; extern const int CANNOT_DETECT_FORMAT; extern const int CANNOT_COMPILE_REGEXP; - extern const int UNSUPPORTED_METHOD; } namespace @@ -279,22 +276,6 @@ std::unique_ptr selectReadBuffer( ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); } - else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) - { -#if USE_LIBURING - auto & reader = context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - - res = std::make_unique( - reader, - Priority{}, - current_path, - context->getSettingsRef().max_read_buffer_size); -#else - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux"); -#endif - } else { if (use_table_fd) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f438c6f4f31..9c21f1fd2a2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -13,6 +13,7 @@ import sys import os import os.path import glob +import platform import signal import re import copy @@ -573,27 +574,6 @@ def get_localzone(): return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:])) -def supports_io_uring(): - return not subprocess.call( - [ - args.binary, - "-q", - "select * from file('/dev/null', 'LineAsString')", - "--storage_file_read_method", - "io_uring", - ], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - -def get_local_filesystem_methods(): - methods = ["read", "pread", "mmap", "pread_threadpool"] - if supports_io_uring(): - methods.append("io_uring") - return methods - - class SettingsRandomizer: settings = { "max_insert_threads": lambda: 0 @@ -634,7 +614,10 @@ class SettingsRandomizer: 0.2, 0.5, 1, 10 * 1024 * 1024 * 1024 ), "local_filesystem_read_method": lambda: random.choice( - get_local_filesystem_methods() + # Allow to use uring only when running on Linux + ["read", "pread", "mmap", "pread_threadpool", "io_uring"] + if platform.system().lower() == "linux" + else ["read", "pread", "mmap", "pread_threadpool"] ), "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]), "local_filesystem_read_prefetch": lambda: random.randint(0, 1), From 584ede2fad6ea475be4e9401841cb0d3fc2cb90f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 25 Feb 2024 23:54:11 +0800 Subject: [PATCH 035/356] opt for multiIf with decimal --- src/Functions/multiIf.cpp | 111 ++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 39 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index cb946b55c73..9cb70570762 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -3,12 +3,20 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -242,8 +250,9 @@ public: const auto & settings = context->getSettingsRef(); const WhichDataType which(removeNullable(result_type)); - bool execute_multiif_columnar - = settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); + bool execute_multiif_columnar = settings.allow_execute_multiif_columnar && !contains_short && instructions.size() <= std::numeric_limits::max() + && (which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal() || which.isDateOrDate32OrDateTimeOrDateTime64() + || which.isEnum() || which.isIPv4() || which.isIPv6()); size_t rows = input_rows_count; if (!execute_multiif_columnar) @@ -253,36 +262,58 @@ public: return std::move(res); } -#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, INDEX) \ +#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, FIELD, INDEX) \ if (which.is##TYPE()) \ { \ - MutableColumnPtr res = ColumnVector::create(rows); \ - MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \ - executeInstructionsColumnar(instructions, rows, res, null_map, result_type->isNullable()); \ - if (!result_type->isNullable()) \ - return std::move(res); \ + MutableColumnPtr res = result_type->createColumn(); \ + res->reserve(rows); \ + if (result_type->isNullable()) \ + { \ + auto & res_nullable = assert_cast(*res); \ + auto & res_data = assert_cast &>(res_nullable.getNestedColumn()).getData(); \ + auto & res_null_map = res_nullable.getNullMapData(); \ + executeInstructionsColumnar(instructions, rows, res_data, &res_null_map); \ + } \ else \ - return ColumnNullable::create(std::move(res), std::move(null_map)); \ + { \ + auto & res_data = assert_cast &>(*res).getData(); \ + executeInstructionsColumnar(instructions, rows, res_data, nullptr); \ + } \ + return std::move(res); \ } #define ENUMERATE_NUMERIC_TYPES(M, INDEX) \ - M(UInt8, INDEX) \ - M(UInt16, INDEX) \ - M(UInt32, INDEX) \ - M(UInt64, INDEX) \ - M(Int8, INDEX) \ - M(Int16, INDEX) \ - M(Int32, INDEX) \ - M(Int64, INDEX) \ - M(UInt128, INDEX) \ - M(UInt256, INDEX) \ - M(Int128, INDEX) \ - M(Int256, INDEX) \ - M(Float32, INDEX) \ - M(Float64, INDEX) \ + M(UInt8, UInt8, INDEX) \ + M(UInt16, UInt16, INDEX) \ + M(UInt32, UInt32, INDEX) \ + M(UInt64, UInt64, INDEX) \ + M(Int8, Int8, INDEX) \ + M(Int16, Int16, INDEX) \ + M(Int32, Int32, INDEX) \ + M(Int64, Int64, INDEX) \ + M(Float32, Float32, INDEX) \ + M(Float64, Float64, INDEX) \ + M(UInt128, UInt128, INDEX) \ + M(UInt256, UInt256, INDEX) \ + M(Int128, Int128, INDEX) \ + M(Int256, Int256, INDEX) \ + M(Decimal32, Decimal32, INDEX) \ + M(Decimal64, Decimal64, INDEX) \ + M(Decimal128, Decimal128, INDEX) \ + M(Decimal256, Decimal256, INDEX) \ + M(Date, UInt16, INDEX) \ + M(Date32, Int32, INDEX) \ + M(DateTime, UInt32, INDEX) \ + M(DateTime64, DateTime64, INDEX) \ + M(Enum8, Int8, INDEX) \ + M(Enum16, Int16, INDEX) \ + M(IPv4, IPv4, INDEX) \ + M(IPv6, IPv6, INDEX) \ throw Exception( \ ErrorCodes::NOT_IMPLEMENTED, "Columnar execution of function {} not implemented for type {}", getName(), result_type->getName()); + ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, UInt8) + /* size_t num_instructions = instructions.size(); if (num_instructions <= std::numeric_limits::max()) { @@ -299,7 +330,10 @@ public: else throw Exception( ErrorCodes::LOGICAL_ERROR, "Instruction size({}) of function {} is out of range", getName(), result_type->getName()); + */ } +#undef ENUMERATE_NUMERIC_TYPES +#undef EXECUTE_INSTRUCTIONS_COLUMNAR private: @@ -382,13 +416,16 @@ private: } template - static void executeInstructionsColumnar(std::vector & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable) + static void executeInstructionsColumnar( + std::vector & instructions, + size_t rows, + PaddedPODArray & res_data, + PaddedPODArray * res_null_map = nullptr) { PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); - PaddedPODArray & res_data = assert_cast &>(*res).getData(); - if (!nullable) + if (!res_null_map) { for (size_t row_i = 0; row_i < rows; ++row_i) { @@ -399,10 +436,9 @@ private: } else { - PaddedPODArray & null_map_data = assert_cast(*null_map).getData(); - std::vector data_cols(instructions.size()); + std::vector data_cols(instructions.size()); std::vector null_map_cols(instructions.size()); - ColumnPtr shared_null_map_col = nullptr; + PaddedPODArray shared_null_map(rows, 0); for (size_t i = 0; i < instructions.size(); ++i) { if (instructions[i].source->isNullable()) @@ -416,24 +452,21 @@ private: nullable_col = assert_cast(data_column.get()); } null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); - data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); + data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); } else { - if (!shared_null_map_col) - { - shared_null_map_col = ColumnUInt8::create(rows, 0); - } - null_map_cols[i] = assert_cast(*shared_null_map_col).getData().data(); - data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); + null_map_cols[i] = shared_null_map.data(); + data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); } } for (size_t row_i = 0; row_i < rows; ++row_i) { - auto & instruction = instructions[inserts[row_i]]; + S insert = inserts[row_i]; + auto & instruction = instructions[insert]; size_t index = instruction.source_is_constant ? 0 : row_i; - res_data[row_i] = *(data_cols[inserts[row_i]] + index); - null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index); + res_data[row_i] = *(data_cols[insert] + index); + (*res_null_map)[row_i] = *(null_map_cols[insert] + index); } } } From ab6ab8bfac4c0b5f2b941b678591cc7d318d6bdf Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 25 Feb 2024 17:11:22 +0000 Subject: [PATCH 036/356] replace only secret arguments with [HIDDEN], also hide it in TreeNode dump --- src/Analyzer/ConstantNode.cpp | 4 +- src/Analyzer/IQueryTreeNode.cpp | 2 + src/Analyzer/IQueryTreeNode.h | 6 + src/Analyzer/Passes/QueryAnalysisPass.cpp | 27 +- src/Parsers/ASTFunction.cpp | 503 +------------------ src/Parsers/FunctionSecretArgumentsFinder.h | 514 ++++++++++++++++++++ 6 files changed, 539 insertions(+), 517 deletions(-) create mode 100644 src/Parsers/FunctionSecretArgumentsFinder.h diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 69bed3dbe90..837c05ff5cb 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -45,10 +45,10 @@ void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state if (hasAlias()) buffer << ", alias: " << getAlias(); - buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value: " << (is_masked ? "[HIDDEN]" : constant_value->getValue().dump()); buffer << ", constant_value_type: " << constant_value->getType()->getName(); - if (getSourceExpression()) + if (!is_masked && getSourceExpression()) { buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index d61cb0ffab1..3ef323c9648 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -45,12 +45,14 @@ const char * toString(QueryTreeNodeType type) } IQueryTreeNode::IQueryTreeNode(size_t children_size, size_t weak_pointers_size) + : is_masked(false) { children.resize(children_size); weak_pointers.resize(weak_pointers_size); } IQueryTreeNode::IQueryTreeNode(size_t children_size) + : is_masked(false) { children.resize(children_size); } diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index b07aa2d31b0..c08a8860749 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -251,6 +251,11 @@ public: return children; } + void setMasked(bool masked = true) + { + is_masked = masked; + } + protected: /** Construct query tree node. * Resize children to children size. @@ -281,6 +286,7 @@ protected: QueryTreeNodes children; QueryTreeWeakNodes weak_pointers; + bool is_masked; private: String alias; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e25dbf52669..31f07b4ec77 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -1373,7 +1374,7 @@ private: ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); - ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); + ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const std::pair & secrets = std::pair()); ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope); @@ -5111,22 +5112,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } /// Resolve function arguments - + FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder{function_node_ptr->toAST()->as()}.getResult(); bool allow_table_expressions = is_special_function_in; auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(), scope, true /*allow_lambda_expression*/, - allow_table_expressions /*allow_table_expression*/); + allow_table_expressions /*allow_table_expression*/, + {secret_arguments.start, secret_arguments.count}); + + for (size_t n = secret_arguments.start; n < secret_arguments.start + secret_arguments.count; ++n) + arguments_projection_names[n] = "[HIDDEN]"; - if (function_node_ptr->toAST()->hasSecretParts()) - { - for (auto & argument : arguments_projection_names) - { - SipHash hash; - hash.update(argument); - argument = getHexUIntLowercase(hash.get128()); - } - } auto & function_node = *function_node_ptr; /// Replace right IN function argument if it is table or table function with subquery that read ordinary columns @@ -6111,7 +6107,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * Example: CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; SELECT plus(*) FROM test_table; * Example: SELECT *** FROM system.one; */ -ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) +ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const std::pair & secrets) { auto & node_list_typed = node_list->as(); size_t node_list_size = node_list_typed.getNodes().size(); @@ -6121,10 +6117,13 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node ProjectionNames result_projection_names; + size_t n = 0; for (auto & node : node_list_typed.getNodes()) { auto node_to_resolve = node; auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression); + if (n >= secrets.first && n < secrets.first + secrets.second) + node_to_resolve->setMasked(); size_t expected_projection_names_size = 1; if (auto * expression_list = node_to_resolve->as()) @@ -6146,6 +6145,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node result_projection_names.insert(result_projection_names.end(), expression_node_projection_names.begin(), expression_node_projection_names.end()); expression_node_projection_names.clear(); + + ++n; } node_list_typed.getNodes() = std::move(result_nodes); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index ba4c7db96e6..7468c41910a 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -36,508 +37,6 @@ namespace ErrorCodes } -namespace -{ - /// Finds arguments of a specified function which should not be displayed for most users for security reasons. - /// That involves passwords and secret keys. - class FunctionSecretArgumentsFinder - { - public: - explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) - { - if (!function.arguments) - return; - - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - arguments = &expr_list->children; - switch (function.kind) - { - case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; - case ASTFunction::Kind::WINDOW_FUNCTION: break; - case ASTFunction::Kind::LAMBDA_FUNCTION: break; - case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; - case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; - case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; - } - } - - struct Result - { - /// Result constructed by default means no arguments will be hidden. - size_t start = static_cast(-1); - size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). - /// In all known cases secret arguments are consecutive - bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. - /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` - std::vector nested_maps; - - bool hasSecrets() const - { - return count != 0 || !nested_maps.empty(); - } - }; - - Result getResult() const { return result; } - - private: - const ASTFunction & function; - const ASTs * arguments = nullptr; - Result result; - - void markSecretArgument(size_t index, bool argument_is_named = false) - { - if (index >= arguments->size()) - return; - if (!result.count) - { - result.start = index; - result.are_named = argument_is_named; - } - chassert(index >= result.start); /// We always check arguments consecutively - result.count = index + 1 - result.start; - if (!argument_is_named) - result.are_named = false; - } - - void findOrdinaryFunctionSecretArguments() - { - if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) - /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || - (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) - { - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ false); - } - else if (function.name == "s3Cluster") - { - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ true); - } - else if ((function.name == "remote") || (function.name == "remoteSecure")) - { - /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - findRemoteFunctionSecretArguments(); - } - else if ((function.name == "encrypt") || (function.name == "decrypt") || - (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || - (function.name == "tryDecrypt")) - { - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - findEncryptionFunctionSecretArguments(); - } - else if (function.name == "url") - { - findURLSecretArguments(); - } - } - - void findMySQLFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// mysql(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - markSecretArgument(4); - } - } - - /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should - /// always be at the end). Marks "headers" as secret, if found. - size_t excludeS3OrURLNestedMaps() - { - size_t count = arguments->size(); - while (count > 0) - { - const ASTFunction * f = arguments->at(count - 1)->as(); - if (!f) - break; - if (f->name == "headers") - result.nested_maps.push_back(f->name); - else if (f->name != "extra_credentials") - break; - count -= 1; - } - return count; - } - - void findS3FunctionSecretArguments(bool is_cluster_function) - { - /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. - size_t url_arg_idx = is_cluster_function ? 1 : 0; - - if (!is_cluster_function && isNamedCollectionName(0)) - { - /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) - { - String second_arg; - if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: s3('url', 'format', ...) - } - } - - /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (url_arg_idx + 2 < count) - markSecretArgument(url_arg_idx + 2); - } - - void findURLSecretArguments() - { - if (!isNamedCollectionName(0)) - excludeS3OrURLNestedMaps(); - } - - bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const - { - if (arg_idx >= arguments->size()) - return false; - - return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); - } - - static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) - { - if (const auto * literal = argument.as()) - { - if (literal->value.getType() != Field::Types::String) - return false; - if (res) - *res = literal->value.safeGet(); - return true; - } - - if (allow_identifier) - { - if (const auto * id = argument.as()) - { - if (res) - *res = id->name(); - return true; - } - } - - return false; - } - - void findRemoteFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// remote(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - return; - } - - /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: - /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) - - /// But we should check the number of arguments first because we don't need to do any replacements in case of - /// remote('addresses_expr', db.table) - if (arguments->size() < 3) - return; - - size_t arg_num = 1; - - /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. - const auto * table_function = (*arguments)[arg_num]->as(); - if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) - { - ++arg_num; - } - else - { - std::optional database; - std::optional qualified_table_name; - if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) - { - /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. - /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' - /// before the argument 'password'. So it's safer to wipe two arguments just in case. - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `user`. - markSecretArgument(arg_num + 2); - } - if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `sharding_key`. - markSecretArgument(arg_num + 3); - } - return; - } - - /// Skip the current argument (which is either a database name or a qualified table name). - ++arg_num; - if (database) - { - /// Skip the 'table' argument if the previous argument was a database name. - ++arg_num; - } - } - - /// Skip username. - ++arg_num; - - /// Do our replacement: - /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); - if (can_be_password) - markSecretArgument(arg_num); - } - - /// Tries to get either a database name or a qualified table name from an argument. - /// Empty string is also allowed (it means the default database). - /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. - bool tryGetDatabaseNameOrQualifiedTableName( - size_t arg_idx, - std::optional & res_database, - std::optional & res_qualified_table_name) const - { - res_database.reset(); - res_qualified_table_name.reset(); - - String str; - if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) - return false; - - if (str.empty()) - { - res_database = ""; - return true; - } - - auto qualified_table_name = QualifiedTableName::tryParseFromString(str); - if (!qualified_table_name) - return false; - - if (qualified_table_name->database.empty()) - res_database = std::move(qualified_table_name->table); - else - res_qualified_table_name = std::move(qualified_table_name); - return true; - } - - void findEncryptionFunctionSecretArguments() - { - if (arguments->empty()) - return; - - /// We replace all arguments after 'mode' with '[HIDDEN]': - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - result.start = 1; - result.count = arguments->size() - 1; - } - - void findTableEngineSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "ExternalDistributed") - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - findExternalDistributedTableEngineSecretArguments(); - } - else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) - { - /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) - /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || - (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) - { - /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - findS3TableEngineSecretArguments(); - } - else if (engine_name == "URL") - { - findURLSecretArguments(); - } - } - - void findExternalDistributedTableEngineSecretArguments() - { - if (isNamedCollectionName(1)) - { - /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 2); - } - else - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - markSecretArgument(5); - } - } - - void findS3TableEngineSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'secret_access_key') - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((3 <= count) && (count <= 4)) - { - String second_arg; - if (tryGetStringFromArgument(1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (count == 3) - { - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: S3('url', 'format', ...) - } - } - } - - /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (2 < count) - markSecretArgument(2); - } - - void findDatabaseEngineSecretArguments() - { - const String & engine_name = function.name; - if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || - (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL")) - { - /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password') - findMySQLDatabaseSecretArguments(); - } - else if (engine_name == "S3") - { - /// S3('url', 'access_key_id', 'secret_access_key') - findS3DatabaseSecretArguments(); - } - } - - void findMySQLDatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// MySQL(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// MySQL('host:port', 'database', 'user', 'password') - markSecretArgument(3); - } - } - - void findS3DatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'password', ...) - findSecretNamedArgument("secret_access_key", 1); - } - else - { - /// S3('url', 'access_key_id', 'secret_access_key') - markSecretArgument(2); - } - } - - void findBackupNameSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "S3") - { - /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - markSecretArgument(2); - } - } - - /// Whether a specified argument can be the name of a named collection? - bool isNamedCollectionName(size_t arg_idx) const - { - if (arguments->size() <= arg_idx) - return false; - - const auto * identifier = (*arguments)[arg_idx]->as(); - return identifier != nullptr; - } - - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) - { - for (size_t i = start; i < arguments->size(); ++i) - { - const auto & argument = (*arguments)[i]; - const auto * equals_func = argument->as(); - if (!equals_func || (equals_func->name != "equals")) - continue; - - const auto * expr_list = equals_func->arguments->as(); - if (!expr_list) - continue; - - const auto & equal_args = expr_list->children; - if (equal_args.size() != 2) - continue; - - String found_key; - if (!tryGetStringFromArgument(*equal_args[0], &found_key)) - continue; - - if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); - } - } - }; -} - - void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { /// These functions contain some unexpected ASTs in arguments (e.g. SETTINGS or even a SELECT query) diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h new file mode 100644 index 00000000000..355dd99a21a --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -0,0 +1,514 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinder +{ +public: + explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) + { + if (!function.arguments) + return; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + arguments = &expr_list->children; + switch (function.kind) + { + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; + } + } + + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` + std::vector nested_maps; + + bool hasSecrets() const + { + return count != 0 || !nested_maps.empty(); + } + }; + + Result getResult() const { return result; } + +private: + const ASTFunction & function; + const ASTs * arguments = nullptr; + Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments->size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() + { + if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || + (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((function.name == "remote") || (function.name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((function.name == "encrypt") || (function.name == "decrypt") || + (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || + (function.name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (function.name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + size_t count = arguments->size(); + while (count > 0) + { + const ASTFunction * f = arguments->at(count - 1)->as(); + if (!f) + break; + if (f->name == "headers") + result.nested_maps.push_back(f->name); + else if (f->name != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments->size()) + return false; + + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) + { + if (literal->value.getType() != Field::Types::String) + return false; + if (res) + *res = literal->value.safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument.as()) + { + if (res) + *res = id->name(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments->size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = (*arguments)[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments->empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments->size() - 1; + } + + void findTableEngineSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "ExternalDistributed") + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + findExternalDistributedTableEngineSecretArguments(); + } + else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) + { + /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) + /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || + (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) + { + /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) + findS3TableEngineSecretArguments(); + } + else if (engine_name == "URL") + { + findURLSecretArguments(); + } + } + + void findExternalDistributedTableEngineSecretArguments() + { + if (isNamedCollectionName(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((3 <= count) && (count <= 4)) + { + String second_arg; + if (tryGetStringFromArgument(1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (count == 3) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: S3('url', 'format', ...) + } + } + } + + /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (2 < count) + markSecretArgument(2); + } + + void findDatabaseEngineSecretArguments() + { + const String & engine_name = function.name; + if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || + (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL")) + { + /// MySQL('host:port', 'database', 'user', 'password') + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); + } + else if (engine_name == "S3") + { + /// S3('url', 'access_key_id', 'secret_access_key') + findS3DatabaseSecretArguments(); + } + } + + void findMySQLDatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findS3DatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'password', ...) + findSecretNamedArgument("secret_access_key", 1); + } + else + { + /// S3('url', 'access_key_id', 'secret_access_key') + markSecretArgument(2); + } + } + + void findBackupNameSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "S3") + { + /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) + markSecretArgument(2); + } + } + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} From 18eb75f4d2243f6c323c342c305f958bb37f4e3c Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Thu, 28 Dec 2023 14:13:52 +0800 Subject: [PATCH 037/356] lazy build join output --- src/Interpreters/HashJoin.cpp | 76 +++++++++++++------ .../performance/storage_join_direct_join.xml | 3 + 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 33dc178ca00..fbd9d522f47 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1034,6 +1034,12 @@ public: } }; + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + AddedColumns( const Block & left_block, const Block & block_with_columns_to_add, @@ -1049,10 +1055,12 @@ public: size_t num_columns_to_add = block_with_columns_to_add.columns(); if (is_asof_join) ++num_columns_to_add; - + has_columns_to_add = num_columns_to_add > 0; columns.reserve(num_columns_to_add); type_name.reserve(num_columns_to_add); right_indexes.reserve(num_columns_to_add); + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); for (const auto & src_column : block_with_columns_to_add) { @@ -1089,6 +1097,34 @@ public: size_t size() const { return columns.size(); } + void buildOutput() + { + for (size_t i = 0; i < this->size(); ++i) + { + auto& col = columns[i]; + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) + { + type_name[i].type->insertDefaultInto(*col); + continue; + } + const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); + /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. + if (is_join_get) + { + if (auto * nullable_col = typeid_cast(col.get()); + nullable_col && !column_from_block.column->isNullable()) + { + nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); + continue; + } + } + col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + } + } + } + ColumnWithTypeAndName moveColumn(size_t i) { return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); @@ -1098,9 +1134,6 @@ public: template void appendFromBlock(const Block & block, size_t row_num) { - if constexpr (has_defaults) - applyLazyDefaults(); - #ifndef NDEBUG for (size_t j = 0; j < right_indexes.size(); ++j) { @@ -1126,33 +1159,20 @@ public: demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); } #endif - - if (is_join_get) + if (has_columns_to_add) { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); - else - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - else - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); - } + lazy_output.blocks.emplace_back(reinterpret_cast(&block)); + lazy_output.row_nums.emplace_back(static_cast(row_num)); } } void appendDefaultRow() { - ++lazy_defaults_count; + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(0); + lazy_output.row_nums.emplace_back(0); + } } void applyLazyDefaults() @@ -1169,6 +1189,10 @@ public: std::vector join_on_keys; + // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, + // default_count cannot represent the position of the row + LazyOutput lazy_output; + size_t max_joined_block_rows = 0; size_t rows_to_add; std::unique_ptr offsets_to_replicate; @@ -1198,6 +1222,7 @@ private: std::vector right_indexes; size_t lazy_defaults_count = 0; + bool has_columns_to_add; /// for ASOF const IColumn * left_asof_key = nullptr; @@ -1702,6 +1727,7 @@ Block HashJoin::joinBlockImpl( added_columns.join_on_keys.clear(); Block remaining_block = sliceBlock(block, num_joined); + added_columns.buildOutput(); for (size_t i = 0; i < added_columns.size(); ++i) block.insert(added_columns.moveColumn(i)); diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml index 2fc63c2c926..70e55ff82a4 100644 --- a/tests/performance/storage_join_direct_join.xml +++ b/tests/performance/storage_join_direct_join.xml @@ -14,6 +14,9 @@ toString(number), toString(number) FROM numbers(1000000); SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1, value2, value3 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1, value2, value3, value4, value5 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1, value2, value3, value4, value5, value6, value7, value8, value9, FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS allow_experimental_analyzer=1 \ No newline at end of file From 5c42f2c0411ba9a3604405d41370304a26a6f542 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Fri, 29 Dec 2023 15:29:54 +0800 Subject: [PATCH 038/356] optimize default value --- src/Interpreters/HashJoin.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index fbd9d522f47..f994336cd53 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1102,13 +1102,20 @@ public: for (size_t i = 0; i < this->size(); ++i) { auto& col = columns[i]; + size_t default_count = 0; for (size_t j = 0; j < lazy_output.blocks.size(); ++j) { if (!lazy_output.blocks[j]) { - type_name[i].type->insertDefaultInto(*col); +// type_name[i].type->insertDefaultInto(*col); + default_count ++; continue; } + if (default_count > 0) + { + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. if (is_join_get) @@ -1122,6 +1129,11 @@ public: } col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); } + if (default_count > 0) + { + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } } } From 3a2c9a8793918fb84d25a893f525a82a62d35691 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Tue, 2 Jan 2024 10:06:31 +0800 Subject: [PATCH 039/356] update --- src/Interpreters/HashJoin.cpp | 56 +++++++------------ .../performance/storage_join_direct_join.xml | 3 - 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index f994336cd53..ca6aa414bc9 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1103,19 +1103,23 @@ public: { auto& col = columns[i]; size_t default_count = 0; - for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + auto apply_default = [&]() { - if (!lazy_output.blocks[j]) - { -// type_name[i].type->insertDefaultInto(*col); - default_count ++; - continue; - } if (default_count > 0) { JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); default_count = 0; } + }; + + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) + { + default_count ++; + continue; + } + apply_default(); const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. if (is_join_get) @@ -1129,11 +1133,7 @@ public: } col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); } - if (default_count > 0) - { - JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); - default_count = 0; - } + apply_default(); } } @@ -1143,7 +1143,6 @@ public: } - template void appendFromBlock(const Block & block, size_t row_num) { #ifndef NDEBUG @@ -1187,16 +1186,6 @@ public: } } - void applyLazyDefaults() - { - if (lazy_defaults_count) - { - for (size_t j = 0, size = right_indexes.size(); j < size; ++j) - JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); - lazy_defaults_count = 0; - } - } - const IColumn & leftAsofKey() const { return *left_asof_key; } std::vector join_on_keys; @@ -1233,7 +1222,6 @@ private: std::vector nullable_column_ptrs; std::vector right_indexes; - size_t lazy_defaults_count = 0; bool has_columns_to_add; /// for ASOF const IColumn * left_asof_key = nullptr; @@ -1345,7 +1333,7 @@ public: } }; -template +template void addFoundRowAll( const typename Map::mapped_type & mapped, AddedColumns & added, @@ -1353,9 +1341,6 @@ void addFoundRowAll( KnownRowsHolder & known_rows [[maybe_unused]], JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) { - if constexpr (add_missing) - added.applyLazyDefaults(); - if constexpr (multiple_disjuncts) { std::unique_ptr::Type>> new_known_rows_ptr; @@ -1364,7 +1349,7 @@ void addFoundRowAll( { if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num); ++current_offset; if (!new_known_rows_ptr) { @@ -1388,7 +1373,7 @@ void addFoundRowAll( { for (auto it = mapped.begin(); it.ok(); ++it) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num); ++current_offset; } } @@ -1477,7 +1462,7 @@ NO_INLINE size_t joinRightColumns( else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); } else addNotFoundRow(added_columns, current_offset); @@ -1487,7 +1472,7 @@ NO_INLINE size_t joinRightColumns( setUsed(added_columns.filter, i); used_flags.template setUsed(find_result); auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); } else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) { @@ -1497,7 +1482,7 @@ NO_INLINE size_t joinRightColumns( { auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; setUsed(added_columns.filter, i); - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); } } else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) @@ -1508,7 +1493,7 @@ NO_INLINE size_t joinRightColumns( if (used_once) { setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num); } break; @@ -1526,7 +1511,7 @@ NO_INLINE size_t joinRightColumns( { setUsed(added_columns.filter, i); used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num); if (join_features.is_any_or_semi_join) { @@ -1549,7 +1534,6 @@ NO_INLINE size_t joinRightColumns( } } - added_columns.applyLazyDefaults(); return i; } diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml index 70e55ff82a4..2fc63c2c926 100644 --- a/tests/performance/storage_join_direct_join.xml +++ b/tests/performance/storage_join_direct_join.xml @@ -14,9 +14,6 @@ toString(number), toString(number) FROM numbers(1000000); SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1, value2, value3 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1, value2, value3, value4, value5 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1, value2, value3, value4, value5, value6, value7, value8, value9, FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS allow_experimental_analyzer=1 \ No newline at end of file From dc07f62c2c5c924693336c9553059dbb303cf6a7 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Tue, 2 Jan 2024 16:26:01 +0800 Subject: [PATCH 040/356] try new case --- tests/performance/storage_join_direct_join.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml index 2fc63c2c926..d3bc25cd96b 100644 --- a/tests/performance/storage_join_direct_join.xml +++ b/tests/performance/storage_join_direct_join.xml @@ -8,12 +8,26 @@ value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, value10 String) ENGINE = Join(ANY, LEFT, key); + CREATE TABLE dict2 (key UInt64, value1 UInt64, value2 Float64, value3 String, + value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, + value10 String) ENGINE = MergeTree ORDER BY key; + INSERT INTO keys SELECT rand() FROM numbers(10000000); INSERT INTO dict SELECT rand(), rand()%1000, rand()*0.0001, toString(number), toString(number), toString(number), toString(number), toString(number), toString(number), toString(number), toString(number) FROM numbers(1000000); + INSERT INTO dict2 SELECT rand(), rand()%1000, rand()*0.0001, toString(number), + toString(number), toString(number), toString(number), toString(number), toString(number), + toString(number), toString(number) FROM numbers(1000000); + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict2 AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS allow_experimental_analyzer=1 + + DROP TABLE IF EXISTS keys + DROP TABLE IF EXISTS dict + DROP TABLE IF EXISTS dict2 \ No newline at end of file From 93fc7a293fb9157c8c327dd748144b36ee80fde6 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Thu, 4 Jan 2024 10:27:59 +0800 Subject: [PATCH 041/356] add new any left join case --- tests/performance/any_join.xml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/performance/any_join.xml diff --git a/tests/performance/any_join.xml b/tests/performance/any_join.xml new file mode 100644 index 00000000000..ed473c36d55 --- /dev/null +++ b/tests/performance/any_join.xml @@ -0,0 +1,23 @@ + + + 1 + + + CREATE TABLE keys (key UInt64) ENGINE = MergeTree ORDER BY key; + CREATE TABLE dict (key UInt64, value1 UInt64, value2 Float64, value3 String, + value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, + value10 String) ENGINE = MergeTree ORDER BY key; + + INSERT INTO keys SELECT rand() %500000 FROM numbers(10000000); + INSERT INTO dict SELECT rand() %300000, rand()%1000, rand()*0.0001, toString(number), + toString(number), toString(number), toString(number), toString(number), toString(number), + toString(number), toString(number) FROM numbers(1000000); + + + + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1 FROM keys ALL INNER JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + + DROP TABLE IF EXISTS keys + DROP TABLE IF EXISTS dict + \ No newline at end of file From b0943ab3e81709bb0db0a077cfe22916c518d5e0 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Thu, 22 Feb 2024 15:34:31 +0800 Subject: [PATCH 042/356] add lazyAddedColumns --- src/Interpreters/HashJoin.cpp | 266 ++++++++++++++++++++++++---------- 1 file changed, 186 insertions(+), 80 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index ca6aa414bc9..1ea12955409 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1034,12 +1034,6 @@ public: } }; - struct LazyOutput - { - PaddedPODArray blocks; - PaddedPODArray row_nums; - }; - AddedColumns( const Block & left_block, const Block & block_with_columns_to_add, @@ -1055,12 +1049,9 @@ public: size_t num_columns_to_add = block_with_columns_to_add.columns(); if (is_asof_join) ++num_columns_to_add; - has_columns_to_add = num_columns_to_add > 0; columns.reserve(num_columns_to_add); type_name.reserve(num_columns_to_add); right_indexes.reserve(num_columns_to_add); - lazy_output.blocks.reserve(rows_to_add); - lazy_output.row_nums.reserve(rows_to_add); for (const auto & src_column : block_with_columns_to_add) { @@ -1095,9 +1086,162 @@ public: } } + virtual ~AddedColumns() { } + size_t size() const { return columns.size(); } - void buildOutput() + virtual void buildOutput() + { + } + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); + } + + virtual void appendFromBlock(const Block & block, size_t row_num, bool has_defaults) + { + if (has_defaults) + applyLazyDefaults(); + +#ifndef NDEBUG + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } +#endif + if (is_join_get) + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + if (auto * nullable_col = nullable_column_ptrs[j]) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + else + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } + else + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } + } + + virtual void appendDefaultRow() + { + ++lazy_defaults_count; + } + + virtual void applyLazyDefaults() + { + if (lazy_defaults_count) + { + for (size_t j = 0, size = right_indexes.size(); j < size; ++j) + JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); + lazy_defaults_count = 0; + } + } + + const IColumn & leftAsofKey() const { return *left_asof_key; } + + std::vector join_on_keys; + + size_t max_joined_block_rows = 0; + size_t rows_to_add; + std::unique_ptr offsets_to_replicate; + bool need_filter = false; + IColumn::Filter filter; + + void reserve(bool need_replicate) + { + if (!max_joined_block_rows) + return; + + /// Do not allow big allocations when user set max_joined_block_rows to huge value + size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); + + if (need_replicate) + /// Reserve 10% more space for columns, because some rows can be repeated + reserve_size = static_cast(1.1 * reserve_size); + + for (auto & column : columns) + column->reserve(reserve_size); + } + +protected: + MutableColumns columns; + bool is_join_get; + std::vector right_indexes; + std::vector type_name; + std::vector nullable_column_ptrs; +private: + + + + + size_t lazy_defaults_count = 0; + /// for ASOF + const IColumn * left_asof_key = nullptr; + + + void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name, qualified_name); + } +}; + +class LazyAddedColumns : public AddedColumns +{ +public: + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + + LazyAddedColumns( + const Block & left_block, + const Block & block_with_columns_to_add, + const Block & saved_block_sample, + const HashJoin & join, + std::vector && join_on_keys_, + bool is_asof_join, + bool is_join_get_) + : AddedColumns(left_block, block_with_columns_to_add, saved_block_sample, join, std::move(join_on_keys_), is_asof_join, is_join_get_) + { + has_columns_to_add = block_with_columns_to_add.columns() > 0; + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); + } + + virtual void buildOutput() override { for (size_t i = 0; i < this->size(); ++i) { @@ -1137,13 +1281,7 @@ public: } } - ColumnWithTypeAndName moveColumn(size_t i) - { - return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); - } - - - void appendFromBlock(const Block & block, size_t row_num) + virtual void appendFromBlock(const Block & block, size_t row_num, bool) override { #ifndef NDEBUG for (size_t j = 0; j < right_indexes.size(); ++j) @@ -1154,8 +1292,8 @@ public: { if (!is_join_get) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); dest_column = nullable_col->getNestedColumnPtr().get(); } /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, @@ -1166,8 +1304,8 @@ public: */ if (typeid(*dest_column) != typeid(*column_from_block)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); } #endif if (has_columns_to_add) @@ -1177,7 +1315,7 @@ public: } } - void appendDefaultRow() + virtual void appendDefaultRow() override { if (has_columns_to_add) { @@ -1186,54 +1324,14 @@ public: } } - const IColumn & leftAsofKey() const { return *left_asof_key; } - - std::vector join_on_keys; + virtual void applyLazyDefaults() override { } +private : // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, // default_count cannot represent the position of the row LazyOutput lazy_output; - - size_t max_joined_block_rows = 0; - size_t rows_to_add; - std::unique_ptr offsets_to_replicate; - bool need_filter = false; - IColumn::Filter filter; - - void reserve(bool need_replicate) - { - if (!max_joined_block_rows) - return; - - /// Do not allow big allocations when user set max_joined_block_rows to huge value - size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); - - if (need_replicate) - /// Reserve 10% more space for columns, because some rows can be repeated - reserve_size = static_cast(1.1 * reserve_size); - - for (auto & column : columns) - column->reserve(reserve_size); - } - -private: - std::vector type_name; - MutableColumns columns; - std::vector nullable_column_ptrs; - - std::vector right_indexes; bool has_columns_to_add; - /// for ASOF - const IColumn * left_asof_key = nullptr; - bool is_join_get; - - void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name, qualified_name); - } }; template @@ -1333,7 +1431,7 @@ public: } }; -template +template void addFoundRowAll( const typename Map::mapped_type & mapped, AddedColumns & added, @@ -1341,6 +1439,9 @@ void addFoundRowAll( KnownRowsHolder & known_rows [[maybe_unused]], JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) { + if constexpr (add_missing) + added.applyLazyDefaults(); + if constexpr (multiple_disjuncts) { std::unique_ptr::Type>> new_known_rows_ptr; @@ -1349,7 +1450,7 @@ void addFoundRowAll( { if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num, false); ++current_offset; if (!new_known_rows_ptr) { @@ -1373,7 +1474,7 @@ void addFoundRowAll( { for (auto it = mapped.begin(); it.ok(); ++it) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num, false); ++current_offset; } } @@ -1462,7 +1563,7 @@ NO_INLINE size_t joinRightColumns( else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); } else addNotFoundRow(added_columns, current_offset); @@ -1472,7 +1573,7 @@ NO_INLINE size_t joinRightColumns( setUsed(added_columns.filter, i); used_flags.template setUsed(find_result); auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); } else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) { @@ -1482,7 +1583,7 @@ NO_INLINE size_t joinRightColumns( { auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; setUsed(added_columns.filter, i); - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); } } else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) @@ -1493,7 +1594,7 @@ NO_INLINE size_t joinRightColumns( if (used_once) { setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); } break; @@ -1511,7 +1612,7 @@ NO_INLINE size_t joinRightColumns( { setUsed(added_columns.filter, i); used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); if (join_features.is_any_or_semi_join) { @@ -1701,14 +1802,19 @@ Block HashJoin::joinBlockImpl( * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. * For ASOF, the last column is used as the ASOF column */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - savedBlockSample(), - *this, - std::move(join_on_keys), - join_features.is_asof_join, - is_join_get); + std::unique_ptr added_columns_ptr; + if (!join_features.is_any_join) + { + added_columns_ptr = std::make_unique( + block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); + } + else + { + added_columns_ptr = std::make_unique( + block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); + } + + AddedColumns & added_columns = * added_columns_ptr; bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = join_features.need_filter || has_required_right_keys; From ce3f95f71760d63b946934363732c9b625c46ad8 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Thu, 22 Feb 2024 16:13:38 +0800 Subject: [PATCH 043/356] fix style --- src/Interpreters/HashJoin.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 1ea12955409..9a2a6157c15 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1200,10 +1200,6 @@ protected: std::vector type_name; std::vector nullable_column_ptrs; private: - - - - size_t lazy_defaults_count = 0; /// for ASOF const IColumn * left_asof_key = nullptr; From 22a33884c32cc403b3e4d6f6a48559e6cf45e613 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Fri, 23 Feb 2024 15:49:36 +0800 Subject: [PATCH 044/356] fix bug --- src/Interpreters/HashJoin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 9a2a6157c15..121181e8475 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1631,6 +1631,7 @@ NO_INLINE size_t joinRightColumns( } } + added_columns.applyLazyDefaults(); return i; } From 2279885c3e039f2aa5278cafb1f3e3a015289366 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Fri, 23 Feb 2024 16:59:04 +0800 Subject: [PATCH 045/356] fix clangtidy error --- src/Interpreters/HashJoin.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 121181e8475..ca7589f1b07 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1086,13 +1086,11 @@ public: } } - virtual ~AddedColumns() { } + virtual ~AddedColumns() = default; size_t size() const { return columns.size(); } - virtual void buildOutput() - { - } + virtual void buildOutput() { } ColumnWithTypeAndName moveColumn(size_t i) { @@ -1237,7 +1235,7 @@ public: lazy_output.row_nums.reserve(rows_to_add); } - virtual void buildOutput() override + void buildOutput() override { for (size_t i = 0; i < this->size(); ++i) { @@ -1277,7 +1275,7 @@ public: } } - virtual void appendFromBlock(const Block & block, size_t row_num, bool) override + void appendFromBlock(const Block & block, size_t row_num, bool) override { #ifndef NDEBUG for (size_t j = 0; j < right_indexes.size(); ++j) @@ -1311,7 +1309,7 @@ public: } } - virtual void appendDefaultRow() override + void appendDefaultRow() override { if (has_columns_to_add) { @@ -1320,7 +1318,7 @@ public: } } - virtual void applyLazyDefaults() override { } + void applyLazyDefaults() override { } private : // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, From 29c44762167707e0763306fc5cc60c15d1f98e49 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Feb 2024 12:09:07 +0800 Subject: [PATCH 046/356] Ping CI From 94f78ac44b3d36d8cedaf51dc62b9182cd8c8ebf Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 26 Feb 2024 15:34:56 +0800 Subject: [PATCH 047/356] use template class --- src/Interpreters/HashJoin.cpp | 330 ++++++++++++++++------------------ 1 file changed, 158 insertions(+), 172 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index ca7589f1b07..202ef51cea2 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1019,6 +1019,7 @@ struct JoinOnKeyColumns bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } }; +template class AddedColumns { public: @@ -1034,6 +1035,12 @@ public: } }; + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + AddedColumns( const Block & left_block, const Block & block_with_columns_to_add, @@ -1047,6 +1054,14 @@ public: , is_join_get(is_join_get_) { size_t num_columns_to_add = block_with_columns_to_add.columns(); + + if constexpr (lazy) + { + has_columns_to_add = block_with_columns_to_add.columns() > 0; + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); + } + if (is_asof_join) ++num_columns_to_add; columns.reserve(num_columns_to_add); @@ -1086,76 +1101,21 @@ public: } } - virtual ~AddedColumns() = default; - size_t size() const { return columns.size(); } - virtual void buildOutput() { } + void buildOutput(); ColumnWithTypeAndName moveColumn(size_t i) { return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); } - virtual void appendFromBlock(const Block & block, size_t row_num, bool has_defaults) - { - if (has_defaults) - applyLazyDefaults(); + void appendFromBlock(const Block & block, size_t row_num, bool has_default); -#ifndef NDEBUG - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } -#endif - if (is_join_get) - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); - else - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - else - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - } + void appendDefaultRow(); - virtual void appendDefaultRow() - { - ++lazy_defaults_count; - } - virtual void applyLazyDefaults() + void applyLazyDefaults() { if (lazy_defaults_count) { @@ -1197,6 +1157,12 @@ protected: std::vector right_indexes; std::vector type_name; std::vector nullable_column_ptrs; + + // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, + // default_count cannot represent the position of the row + LazyOutput lazy_output; + bool has_columns_to_add; + private: size_t lazy_defaults_count = 0; /// for ASOF @@ -1210,123 +1176,153 @@ private: type_name.emplace_back(src_column.type, src_column.name, qualified_name); } }; - -class LazyAddedColumns : public AddedColumns +template<> void AddedColumns::buildOutput() { -public: - struct LazyOutput +} +template<> +void AddedColumns::buildOutput() +{ + for (size_t i = 0; i < this->size(); ++i) { - PaddedPODArray blocks; - PaddedPODArray row_nums; - }; - - LazyAddedColumns( - const Block & left_block, - const Block & block_with_columns_to_add, - const Block & saved_block_sample, - const HashJoin & join, - std::vector && join_on_keys_, - bool is_asof_join, - bool is_join_get_) - : AddedColumns(left_block, block_with_columns_to_add, saved_block_sample, join, std::move(join_on_keys_), is_asof_join, is_join_get_) - { - has_columns_to_add = block_with_columns_to_add.columns() > 0; - lazy_output.blocks.reserve(rows_to_add); - lazy_output.row_nums.reserve(rows_to_add); - } - - void buildOutput() override - { - for (size_t i = 0; i < this->size(); ++i) + auto& col = columns[i]; + size_t default_count = 0; + auto apply_default = [&]() { - auto& col = columns[i]; - size_t default_count = 0; - auto apply_default = [&]() + if (default_count > 0) { - if (default_count > 0) - { - JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); - default_count = 0; - } - }; + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } + }; - for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) { - if (!lazy_output.blocks[j]) - { - default_count ++; - continue; - } - apply_default(); - const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); - /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. - if (is_join_get) - { - if (auto * nullable_col = typeid_cast(col.get()); - nullable_col && !column_from_block.column->isNullable()) - { - nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); - continue; - } - } - col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + default_count ++; + continue; } apply_default(); - } - } - - void appendFromBlock(const Block & block, size_t row_num, bool) override - { -#ifndef NDEBUG - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) + const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); + /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. + if (is_join_get) { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); + if (auto * nullable_col = typeid_cast(col.get()); + nullable_col && !column_from_block.column->isNullable()) + { + nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); + continue; + } } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + } + apply_default(); + } +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) +{ + if (has_defaults) + applyLazyDefaults(); + +#ifndef NDEBUG + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, * because dictionaries can be different, while calling insertFrom on them is safe, for example: * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) * and * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } #endif - if (has_columns_to_add) - { - lazy_output.blocks.emplace_back(reinterpret_cast(&block)); - lazy_output.row_nums.emplace_back(static_cast(row_num)); - } - } - - void appendDefaultRow() override + if (is_join_get) { - if (has_columns_to_add) + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) { - lazy_output.blocks.emplace_back(0); - lazy_output.row_nums.emplace_back(0); + const auto & column_from_block = block.getByPosition(right_indexes[j]); + if (auto * nullable_col = nullable_column_ptrs[j]) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + else + columns[j]->insertFrom(*column_from_block.column, row_num); } } + else + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } +} - void applyLazyDefaults() override { } +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) +{ +#ifndef NDEBUG + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } +#endif + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(reinterpret_cast(&block)); + lazy_output.row_nums.emplace_back(static_cast(row_num)); + } +} +template<> +void AddedColumns::appendDefaultRow() +{ + ++lazy_defaults_count; +} -private : - // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, - // default_count cannot represent the position of the row - LazyOutput lazy_output; - bool has_columns_to_add; - -}; +template<> +void AddedColumns::appendDefaultRow() +{ + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(0); + lazy_output.row_nums.emplace_back(0); + } +} template struct JoinFeatures @@ -1425,7 +1421,7 @@ public: } }; -template +template void addFoundRowAll( const typename Map::mapped_type & mapped, AddedColumns & added, @@ -1474,7 +1470,7 @@ void addFoundRowAll( } } -template +template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { if constexpr (add_missing) @@ -1494,7 +1490,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE size_t joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1633,7 +1629,7 @@ NO_INLINE size_t joinRightColumns( return i; } -template +template size_t joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, @@ -1645,7 +1641,7 @@ size_t joinRightColumnsSwitchMultipleDisjuncts( : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } -template +template size_t joinRightColumnsSwitchNullability( std::vector && key_getter_vector, const std::vector & mapv, @@ -1662,7 +1658,7 @@ size_t joinRightColumnsSwitchNullability( } } -template +template size_t switchJoinRightColumns( const std::vector & mapv, AddedColumns & added_columns, @@ -1797,19 +1793,9 @@ Block HashJoin::joinBlockImpl( * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. * For ASOF, the last column is used as the ASOF column */ - std::unique_ptr added_columns_ptr; - if (!join_features.is_any_join) - { - added_columns_ptr = std::make_unique( - block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); - } - else - { - added_columns_ptr = std::make_unique( - block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); - } + AddedColumns added_columns( + block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); - AddedColumns & added_columns = * added_columns_ptr; bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = join_features.need_filter || has_required_right_keys; From 2b51497e554da8bc7c3f43e7f5d6354db5086a4f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Feb 2024 08:50:23 +0100 Subject: [PATCH 048/356] Update docker/test/stateless/run.sh --- docker/test/stateless/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index f2f9d6ac3bf..8b617320f91 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -69,6 +69,7 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th } # commit_logs_cache_size_threshold setting doesn't exist on some older versions remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" fi # For flaky check we also enable thread fuzzer From a0f108ec3fd6f55d741c7aa6c162d0199e3385b0 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 26 Feb 2024 16:06:18 +0800 Subject: [PATCH 049/356] fix bug --- src/Interpreters/HashJoin.cpp | 39 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 202ef51cea2..83162e16151 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1054,16 +1054,16 @@ public: , is_join_get(is_join_get_) { size_t num_columns_to_add = block_with_columns_to_add.columns(); + if (is_asof_join) + ++num_columns_to_add; if constexpr (lazy) { - has_columns_to_add = block_with_columns_to_add.columns() > 0; + has_columns_to_add = num_columns_to_add > 0; lazy_output.blocks.reserve(rows_to_add); lazy_output.row_nums.reserve(rows_to_add); } - if (is_asof_join) - ++num_columns_to_add; columns.reserve(num_columns_to_add); type_name.reserve(num_columns_to_add); right_indexes.reserve(num_columns_to_add); @@ -1114,16 +1114,7 @@ public: void appendDefaultRow(); - - void applyLazyDefaults() - { - if (lazy_defaults_count) - { - for (size_t j = 0, size = right_indexes.size(); j < size; ++j) - JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); - lazy_defaults_count = 0; - } - } + void applyLazyDefaults(); const IColumn & leftAsofKey() const { return *left_asof_key; } @@ -1220,6 +1211,22 @@ void AddedColumns::buildOutput() } } +template<> +void AddedColumns::applyLazyDefaults() +{ + if (lazy_defaults_count) + { + for (size_t j = 0, size = right_indexes.size(); j < size; ++j) + JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); + lazy_defaults_count = 0; + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ +} + template <> void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) { @@ -1309,13 +1316,13 @@ void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bo } } template<> -void AddedColumns::appendDefaultRow() +void AddedColumns::appendDefaultRow() { ++lazy_defaults_count; } template<> -void AddedColumns::appendDefaultRow() +void AddedColumns::appendDefaultRow() { if (has_columns_to_add) { @@ -1324,6 +1331,8 @@ void AddedColumns::appendDefaultRow() } } + + template struct JoinFeatures { From 757b2f3369dc83192d0c2ef655882fc067832732 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 26 Feb 2024 16:49:15 +0800 Subject: [PATCH 050/356] fix style --- src/Interpreters/HashJoin.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 83162e16151..bdc7162bbb3 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1170,6 +1170,7 @@ private: template<> void AddedColumns::buildOutput() { } + template<> void AddedColumns::buildOutput() { @@ -1331,8 +1332,6 @@ void AddedColumns::appendDefaultRow() } } - - template struct JoinFeatures { From f885423a4a1c95118e026042a878226749d93441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 26 Feb 2024 15:09:05 +0100 Subject: [PATCH 051/356] Cleanup and more tests --- src/Interpreters/InterpreterSelectQuery.cpp | 3 +- .../02972_parallel_replicas_cte.sql | 42 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 135b535595c..1a9827d30f8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -778,6 +778,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( }; /// This is a hack to make sure we reanalyze if GlobalSubqueriesVisitor changed allow_experimental_parallel_reading_from_replicas + /// inside the query context (because it doesn't have write access to the main context) UInt64 parallel_replicas_before_analysis = context->hasQueryContext() ? context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas : 0; analyze(shouldMoveToPrewhere()); @@ -787,7 +788,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (context->hasQueryContext()) { - /// No buts or ifs, if the analysis changed this setting we must reanalyze without parallel replicas + /// As this query can't be executed with parallel replicas, we must reanalyze it if (context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas != parallel_replicas_before_analysis) { diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql index c7143b5aa93..c9ab83ff9ad 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; +DROP TABLE IF EXISTS numbers_1e6; CREATE TABLE pr_1 (`a` UInt32) ENGINE = MergeTree ORDER BY a PARTITION BY a % 10 AS SELECT 10 * intDiv(number, 10) + 1 FROM numbers(1_000_000); @@ -28,7 +29,7 @@ SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_fr SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; --- Being a subquery should still disable parallel replicas +-- Parallel replicas detection should work inside subqueries SELECT * FROM ( @@ -37,5 +38,44 @@ FROM ) SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; +-- Subquery + subquery +SELECT count() +FROM +( + SELECT c + 1 + FROM + ( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() as c FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a + ) +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +CREATE TABLE numbers_1e6 +( + `n` UInt64 +) +ENGINE = MergeTree +ORDER BY n +AS SELECT * FROM numbers(1_000_000); + +-- Same but nested CTE's +WITH + cte1 AS + ( + SELECT n + FROM numbers_1e6 + ), + cte2 AS + ( + SELECT n + FROM numbers_1e6 + WHERE n IN (cte1) + ) +SELECT count() +FROM cte2 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +DROP TABLE IF EXISTS numbers_1e6; DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; From 0b72f7b18201819b4997c675a7bcc2ac19654908 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Mon, 26 Feb 2024 22:46:51 +0800 Subject: [PATCH 052/356] Make all format names case insensitive. --- docs/en/interfaces/formats.md | 1 + src/Formats/FormatFactory.cpp | 32 ++++--- src/Formats/FormatFactory.h | 5 +- src/Functions/formatRow.cpp | 4 +- .../00309_formats_case_insensitive.reference | 95 +++++++++++++++++++ .../00309_formats_case_insensitive.sql | 23 +++++ 6 files changed, 145 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/00309_formats_case_insensitive.reference create mode 100644 tests/queries/0_stateless/00309_formats_case_insensitive.sql diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 285737312bd..a76bb01ce9e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -7,6 +7,7 @@ title: Formats for Input and Output Data ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read a dictionary. A format supported for output can be used to arrange the results of a `SELECT`, and to perform `INSERT`s into a file-backed table. +All format names are case insensitive. The supported formats are: diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 0654dd01e49..38b29bc6405 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -31,9 +31,18 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +String FormatFactory::getOriginalFormatNameIfExists(const String & name) const +{ + String case_insensitive_format_name = boost::to_lower_copy(name); + auto it = file_extension_formats.find(case_insensitive_format_name); + if (file_extension_formats.end() != it) + return it->second; + return name; +} + const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const { - auto it = dict.find(name); + auto it = dict.find(getOriginalFormatNameIfExists(name)); if (dict.end() != it) return it->second; throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); @@ -542,7 +551,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader( const ContextPtr & context, const std::optional & _format_settings) const { - const auto & schema_reader_creator = dict.at(name).schema_reader_creator; + const auto & schema_reader_creator = getCreators(name).schema_reader_creator; if (!schema_reader_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} doesn't support schema inference.", name); @@ -558,7 +567,7 @@ ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader( const ContextPtr & context, const std::optional & _format_settings) const { - const auto & external_schema_reader_creator = dict.at(name).external_schema_reader_creator; + const auto & external_schema_reader_creator = getCreators(name).external_schema_reader_creator; if (!external_schema_reader_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} doesn't support schema inference.", name); @@ -574,7 +583,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_ throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.input_creator = std::move(input_creator); registerFileExtension(name, name); - KnownFormatNames::instance().add(name); + KnownFormatNames::instance().add(name, /* case_insensitive = */ true); } void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomAccessInputCreator input_creator) @@ -585,7 +594,7 @@ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomA throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.random_access_input_creator = std::move(input_creator); registerFileExtension(name, name); - KnownFormatNames::instance().add(name); + KnownFormatNames::instance().add(name, /* case_insensitive = */ true); } void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker) @@ -612,7 +621,7 @@ void FormatFactory::markFormatHasNoAppendSupport(const String & name) bool FormatFactory::checkIfFormatSupportAppend(const String & name, const ContextPtr & context, const std::optional & format_settings_) { auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); - auto & append_support_checker = dict[name].append_support_checker; + const auto & append_support_checker = getCreators(name).append_support_checker; /// By default we consider that format supports append return !append_support_checker || append_support_checker(format_settings); } @@ -624,7 +633,7 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already registered", name); target = std::move(output_creator); registerFileExtension(name, name); - KnownFormatNames::instance().add(name); + KnownFormatNames::instance().add(name, /* case_insensitive = */ true); } void FormatFactory::registerFileExtension(const String & extension, const String & format_name) @@ -791,13 +800,13 @@ String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const bool FormatFactory::isInputFormat(const String & name) const { - auto it = dict.find(name); + auto it = dict.find(getOriginalFormatNameIfExists(name)); return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator); } bool FormatFactory::isOutputFormat(const String & name) const { - auto it = dict.find(name); + auto it = dict.find(getOriginalFormatNameIfExists(name)); return it != dict.end() && it->second.output_creator; } @@ -826,7 +835,8 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const { - if (name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order) + auto format_name = getOriginalFormatNameIfExists(name); + if (format_name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order) return false; return true; @@ -834,7 +844,7 @@ bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, cons void FormatFactory::checkFormatName(const String & name) const { - auto it = dict.find(name); + auto it = dict.find(getOriginalFormatNameIfExists(name)); if (it == dict.end()) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 165a20f7c4d..145f6258933 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -266,7 +266,7 @@ public: private: FormatsDictionary dict; - FileExtensionFormats file_extension_formats; + FileExtensionFormats file_extension_formats; // Also used as a case-insensitive format_name mapping. const Creators & getCreators(const String & name) const; @@ -279,6 +279,9 @@ private: const Settings & settings, bool is_remote_fs, size_t max_download_threads) const; + + // Mapping case-insensitive format_name to a key in FormatsDictionary if exists. + String getOriginalFormatNameIfExists(const String & name) const; }; } diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index 12a5fc2cc27..1ac6becfb15 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -18,7 +18,6 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_FORMAT; extern const int BAD_ARGUMENTS; } @@ -40,8 +39,7 @@ public: , arguments_column_names(std::move(arguments_column_names_)) , context(std::move(context_)) { - if (!FormatFactory::instance().getAllFormats().contains(format_name)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", format_name); + FormatFactory::instance().checkFormatName(format_name); } String getName() const override { return name; } diff --git a/tests/queries/0_stateless/00309_formats_case_insensitive.reference b/tests/queries/0_stateless/00309_formats_case_insensitive.reference new file mode 100644 index 00000000000..b74d7002833 --- /dev/null +++ b/tests/queries/0_stateless/00309_formats_case_insensitive.reference @@ -0,0 +1,95 @@ +-- test FORMAT clause -- +0 Hello & world +1 Hello & world +2 Hello & world +0,"Hello & world" +1,"Hello & world" +2,"Hello & world" + + + + + + number + UInt64 + + + 'Hello & world' + String + + + + + + 0 + Hello & world + + + 1 + Hello & world + + + 2 + Hello & world + + + 3 + +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "'Hello & world'", + "type": "String" + } + ], + + "data": + [ + { + "number": "0", + "'Hello & world'": "Hello & world" + }, + { + "number": "1", + "'Hello & world'": "Hello & world" + }, + { + "number": "2", + "'Hello & world'": "Hello & world" + } + ], + + "rows": 3 +} +Row 1: +────── +number: 0 +'Hello & world': Hello & world + +Row 2: +────── +number: 1 +'Hello & world': Hello & world + +Row 3: +────── +number: 2 +'Hello & world': Hello & world +-- test table function -- +0 Hello & world +1 Hello & world +2 Hello & world +0 Hello & world +1 Hello & world +2 Hello & world +-- test other function -- +0 Hello & world +-- test table engine -- +0 Hello & world +1 Hello & world +2 Hello & world diff --git a/tests/queries/0_stateless/00309_formats_case_insensitive.sql b/tests/queries/0_stateless/00309_formats_case_insensitive.sql new file mode 100644 index 00000000000..b4037ed9861 --- /dev/null +++ b/tests/queries/0_stateless/00309_formats_case_insensitive.sql @@ -0,0 +1,23 @@ +SELECT '-- test FORMAT clause --'; +SET output_format_write_statistics = 0; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT Tsv; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT csv; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT xMl; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT JsonStrINGs; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT VERTICAL; + +SELECT '-- test table function --'; +INSERT INTO FUNCTION file('data_00309_formats_case_insensitive', 'Csv') SELECT number, 'Hello & world' FROM numbers(3) SETTINGS engine_file_truncate_on_insert=1; +SELECT * FROM file('data_00309_formats_case_insensitive', 'Csv'); + +INSERT INTO FUNCTION file('data_00309_formats_case_insensitive.cSv') SELECT number, 'Hello & world' FROM numbers(3) SETTINGS engine_file_truncate_on_insert=1; +SELECT * FROM file('data_00309_formats_case_insensitive.cSv'); + +SELECT '-- test other function --'; +SELECT * FROM format(cSv, '0,Hello & world'); + +SELECT '-- test table engine --'; +DROP TABLE IF EXISTS test_00309_formats_case_insensitive; +CREATE TABLE test_00309_formats_case_insensitive(a Int64, b String) ENGINE=File(Csv); +INSERT INTO test_00309_formats_case_insensitive SELECT number, 'Hello & world' FROM numbers(3); +SELECT * FROM test_00309_formats_case_insensitive; From ea89fa0de9a69d68e1a29447ed3d022ed9a3bf84 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Mon, 26 Feb 2024 23:07:50 +0800 Subject: [PATCH 053/356] replace to checkFormatName --- src/Storages/StorageAzureBlob.cpp | 8 ++++---- src/Storages/StorageS3.cpp | 7 ++++--- .../TableFunctionAzureBlobStorage.cpp | 5 +++-- src/TableFunctions/TableFunctionS3.cpp | 15 ++++++++------- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index f5fcf01c59e..94bb5d3cf60 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -68,7 +68,7 @@ namespace ErrorCodes extern const int CANNOT_DETECT_FORMAT; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - + extern const int UNKNOWN_FORMAT; } namespace @@ -167,7 +167,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine auto is_format_arg = [] (const std::string & s) -> bool { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); + return s == "auto" || FormatFactory::instance().checkFormatName(s); }; if (engine_args.size() == 4) @@ -200,7 +200,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 6) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } @@ -218,7 +218,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d8ef3df1c8..07f68072bb6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -133,6 +133,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int CANNOT_COMPILE_REGEXP; extern const int FILE_DOESNT_EXIST; + extern const int UNKNOWN_FORMAT; } @@ -1531,7 +1532,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C no_sign_request = true; engine_args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -1552,7 +1553,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -1568,7 +1569,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; } diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 066d6338b6a..ac96364b5bd 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -32,6 +32,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_FORMAT; } namespace @@ -80,7 +81,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; if (engine_args.size() == 4) { @@ -207,7 +208,7 @@ void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(AS arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; /// (connection_string, container_name, blobpath) if (args.size() == 3) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 3fedd38277c..04182fa4e68 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; + extern const int UNKNOWN_FORMAT; } @@ -100,7 +101,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -119,14 +120,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}, {"structure", 3}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; } else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -153,7 +154,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; } @@ -170,7 +171,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; } @@ -300,7 +301,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args.push_back(structure_literal); } /// s3(source, format, structure) - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) { if (second_arg == "auto") args[1] = format_literal; @@ -330,7 +331,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args[3] = structure_literal; } /// s3(source, format, structure, compression_method) - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) { if (second_arg == "auto") args[1] = format_literal; From dbd8d35f01a55389c4fbfbcdf1b2d0b9f9b703ba Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Tue, 27 Feb 2024 00:48:34 +0800 Subject: [PATCH 054/356] use lower case in dict --- src/Formats/FormatFactory.cpp | 64 +++++++++++-------- src/Formats/FormatFactory.h | 8 +-- src/Storages/StorageAzureBlob.cpp | 7 +- src/Storages/StorageS3.cpp | 7 +- src/Storages/System/StorageSystemFormats.cpp | 3 +- .../TableFunctionAzureBlobStorage.cpp | 5 +- src/TableFunctions/TableFunctionS3.cpp | 15 ++--- 7 files changed, 59 insertions(+), 50 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 38b29bc6405..2bead318173 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -31,23 +31,35 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -String FormatFactory::getOriginalFormatNameIfExists(const String & name) const +bool FormatFactory::exists(const String & name) const { - String case_insensitive_format_name = boost::to_lower_copy(name); - auto it = file_extension_formats.find(case_insensitive_format_name); - if (file_extension_formats.end() != it) - return it->second; - return name; + return dict.find(boost::to_lower_copy(name)) != dict.end(); } const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); if (dict.end() != it) return it->second; throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); } +FormatFactory::Creators & FormatFactory::getOrCreateCreators(const String & name) +{ + String lower_case = boost::to_lower_copy(name); + auto it = dict.find(lower_case); + if (dict.end() != it) + { + return it->second; + } + else + { + auto & creators = dict[lower_case]; + creators.name = name; + return creators; + } +} + FormatSettings getFormatSettings(const ContextPtr & context) { const auto & settings = context->getSettingsRef(); @@ -578,7 +590,7 @@ ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader( void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator) { chassert(input_creator); - auto & creators = dict[name]; + auto & creators = getOrCreateCreators(name); if (creators.input_creator || creators.random_access_input_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.input_creator = std::move(input_creator); @@ -589,7 +601,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomAccessInputCreator input_creator) { chassert(input_creator); - auto & creators = dict[name]; + auto & creators = getOrCreateCreators(name); if (creators.input_creator || creators.random_access_input_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.random_access_input_creator = std::move(input_creator); @@ -599,7 +611,7 @@ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomA void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker) { - auto & target = dict[name].non_trivial_prefix_and_suffix_checker; + auto & target = getOrCreateCreators(name).non_trivial_prefix_and_suffix_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Non trivial prefix and suffix checker {} is already registered", name); target = std::move(non_trivial_prefix_and_suffix_checker); @@ -607,7 +619,7 @@ void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name void FormatFactory::registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker) { - auto & target = dict[name].append_support_checker; + auto & target = getOrCreateCreators(name).append_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Suffix checker {} is already registered", name); target = std::move(append_support_checker); @@ -628,7 +640,7 @@ bool FormatFactory::checkIfFormatSupportAppend(const String & name, const Contex void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator) { - auto & target = dict[name].output_creator; + auto & target = getOrCreateCreators(name).output_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already registered", name); target = std::move(output_creator); @@ -705,7 +717,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd) void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine) { - auto & target = dict[name].file_segmentation_engine_creator; + auto & target = getOrCreateCreators(name).file_segmentation_engine_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine {} is already registered", name); auto creator = [file_segmentation_engine](const FormatSettings &) @@ -717,7 +729,7 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm void FormatFactory::registerFileSegmentationEngineCreator(const String & name, FileSegmentationEngineCreator file_segmentation_engine_creator) { - auto & target = dict[name].file_segmentation_engine_creator; + auto & target = getOrCreateCreators(name).file_segmentation_engine_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine creator {} is already registered", name); target = std::move(file_segmentation_engine_creator); @@ -725,7 +737,7 @@ void FormatFactory::registerFileSegmentationEngineCreator(const String & name, F void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator) { - auto & target = dict[name].schema_reader_creator; + auto & target = getOrCreateCreators(name).schema_reader_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name); target = std::move(schema_reader_creator); @@ -733,7 +745,7 @@ void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreato void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator) { - auto & target = dict[name].external_schema_reader_creator; + auto & target = getOrCreateCreators(name).external_schema_reader_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name); target = std::move(external_schema_reader_creator); @@ -741,7 +753,7 @@ void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSc void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name) { - auto & target = dict[name].supports_parallel_formatting; + auto & target = getOrCreateCreators(name).supports_parallel_formatting; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already marked as supporting parallel formatting", name); target = true; @@ -750,7 +762,7 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].subset_of_columns_support_checker; + auto & target = getOrCreateCreators(name).subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); target = [](const FormatSettings &){ return true; }; @@ -758,7 +770,7 @@ void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker) { - auto & target = dict[name].subset_of_columns_support_checker; + auto & target = getOrCreateCreators(name).subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); target = std::move(subset_of_columns_support_checker); @@ -766,7 +778,7 @@ void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, S void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) { - auto & target = dict[name].prefers_large_blocks; + auto & target = getOrCreateCreators(name).prefers_large_blocks; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as preferring large blocks", name); target = true; @@ -782,7 +794,7 @@ bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name, co void FormatFactory::registerAdditionalInfoForSchemaCacheGetter( const String & name, AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter) { - auto & target = dict[name].additional_info_for_schema_cache_getter; + auto & target = getOrCreateCreators(name).additional_info_for_schema_cache_getter; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: additional info for schema cache getter {} is already registered", name); target = std::move(additional_info_for_schema_cache_getter); @@ -800,13 +812,13 @@ String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const bool FormatFactory::isInputFormat(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator); } bool FormatFactory::isOutputFormat(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); return it != dict.end() && it->second.output_creator; } @@ -835,8 +847,8 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const { - auto format_name = getOriginalFormatNameIfExists(name); - if (format_name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order) + auto format_name = boost::to_lower_copy(name); + if (format_name == "parquet" && context->getSettingsRef().input_format_parquet_preserve_order) return false; return true; @@ -844,7 +856,7 @@ bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, cons void FormatFactory::checkFormatName(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); if (it == dict.end()) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 145f6258933..46c1b8ddcdd 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -132,6 +132,7 @@ private: struct Creators { + String name; InputCreator input_creator; RandomAccessInputCreator random_access_input_creator; OutputCreator output_creator; @@ -263,12 +264,14 @@ public: /// Check that format with specified name exists and throw an exception otherwise. void checkFormatName(const String & name) const; + bool exists(const String & name) const; private: FormatsDictionary dict; - FileExtensionFormats file_extension_formats; // Also used as a case-insensitive format_name mapping. + FileExtensionFormats file_extension_formats; const Creators & getCreators(const String & name) const; + Creators & getOrCreateCreators(const String & name); // Creates a ReadBuffer to give to an input format. Returns nullptr if we should use `buf` directly. std::unique_ptr wrapReadBufferIfNeeded( @@ -279,9 +282,6 @@ private: const Settings & settings, bool is_remote_fs, size_t max_download_threads) const; - - // Mapping case-insensitive format_name to a key in FormatsDictionary if exists. - String getOriginalFormatNameIfExists(const String & name) const; }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 94bb5d3cf60..d484fefc46f 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -68,7 +68,6 @@ namespace ErrorCodes extern const int CANNOT_DETECT_FORMAT; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int UNKNOWN_FORMAT; } namespace @@ -167,7 +166,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine auto is_format_arg = [] (const std::string & s) -> bool { - return s == "auto" || FormatFactory::instance().checkFormatName(s); + return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) @@ -200,7 +199,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 6) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } @@ -218,7 +217,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 07f68072bb6..e59a09efb20 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -133,7 +133,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int CANNOT_COMPILE_REGEXP; extern const int FILE_DOESNT_EXIST; - extern const int UNKNOWN_FORMAT; } @@ -1532,7 +1531,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C no_sign_request = true; engine_args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -1553,7 +1552,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -1569,7 +1568,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; } diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index a360971e1f7..849e4eadf78 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -23,7 +23,8 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, co const auto & formats = FormatFactory::instance().getAllFormats(); for (const auto & pair : formats) { - const auto & [format_name, creators] = pair; + const auto & [name, creators] = pair; + String format_name = creators.name; UInt64 has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr); UInt64 has_output_format(creators.output_creator != nullptr); UInt64 supports_parallel_parsing(creators.file_segmentation_engine_creator != nullptr || creators.random_access_input_creator != nullptr); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index ac96364b5bd..8f558adb09b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -32,7 +32,6 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; - extern const int UNKNOWN_FORMAT; } namespace @@ -81,7 +80,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -208,7 +207,7 @@ void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(AS arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; /// (connection_string, container_name, blobpath) if (args.size() == 3) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 04182fa4e68..c00b1e2e3e5 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -31,7 +31,6 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; - extern const int UNKNOWN_FORMAT; } @@ -101,7 +100,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -120,14 +119,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}, {"structure", 3}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; } else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -154,7 +153,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; } @@ -171,7 +170,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; } @@ -301,7 +300,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args.push_back(structure_literal); } /// s3(source, format, structure) - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal; @@ -331,7 +330,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args[3] = structure_literal; } /// s3(source, format, structure, compression_method) - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal; From 24155c80c987356fb6f71060563932a9ede6a14c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B1=AA=E8=82=A5=E8=82=A5?= Date: Tue, 27 Feb 2024 07:50:04 +0800 Subject: [PATCH 055/356] Update src/Formats/FormatFactory.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Formats/FormatFactory.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 2bead318173..527e0a20753 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -49,14 +49,11 @@ FormatFactory::Creators & FormatFactory::getOrCreateCreators(const String & name String lower_case = boost::to_lower_copy(name); auto it = dict.find(lower_case); if (dict.end() != it) - { return it->second; - } - else - { - auto & creators = dict[lower_case]; - creators.name = name; - return creators; + + auto & creators = dict[lower_case]; + creators.name = name; + return creators; } } From 6f9cb058a6ff0306fd4b2bd27ec0057185697f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B1=AA=E8=82=A5=E8=82=A5?= Date: Tue, 27 Feb 2024 07:59:09 +0800 Subject: [PATCH 056/356] Update FormatFactory.cpp --- src/Formats/FormatFactory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 527e0a20753..3303a0a4b66 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -54,7 +54,6 @@ FormatFactory::Creators & FormatFactory::getOrCreateCreators(const String & name auto & creators = dict[lower_case]; creators.name = name; return creators; - } } FormatSettings getFormatSettings(const ContextPtr & context) From 4b858f167bf838b71078174f33dfc909133188f0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 27 Feb 2024 05:52:07 +0000 Subject: [PATCH 057/356] consistent mask ids based on node hash, respect format_display_secrets_in_show_and_select setting --- src/Analyzer/ConstantNode.cpp | 9 +++++++-- src/Analyzer/ConstantNode.h | 6 ++++++ src/Analyzer/IQueryTreeNode.cpp | 2 -- src/Analyzer/IQueryTreeNode.h | 6 ------ src/Analyzer/Passes/QueryAnalysisPass.cpp | 21 ++++++++++++++++----- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 837c05ff5cb..f80e18f6327 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -45,10 +45,15 @@ void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state if (hasAlias()) buffer << ", alias: " << getAlias(); - buffer << ", constant_value: " << (is_masked ? "[HIDDEN]" : constant_value->getValue().dump()); + buffer << ", constant_value: "; + if (mask_id) + buffer << "[HIDDEN id: " << mask_id << "]"; + else + buffer << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); - if (!is_masked && getSourceExpression()) + if (!mask_id && getSourceExpression()) { buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 51c98a4a3b3..dd2ccee23cd 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,6 +75,11 @@ public: return constant_value->getType(); } + void setMaskId(size_t id) + { + mask_id = id; + } + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: @@ -90,6 +95,7 @@ private: ConstantValuePtr constant_value; String value_string; QueryTreeNodePtr source_expression; + size_t mask_id = 0; static constexpr size_t children_size = 0; }; diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index 3ef323c9648..d61cb0ffab1 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -45,14 +45,12 @@ const char * toString(QueryTreeNodeType type) } IQueryTreeNode::IQueryTreeNode(size_t children_size, size_t weak_pointers_size) - : is_masked(false) { children.resize(children_size); weak_pointers.resize(weak_pointers_size); } IQueryTreeNode::IQueryTreeNode(size_t children_size) - : is_masked(false) { children.resize(children_size); } diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index c08a8860749..b07aa2d31b0 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -251,11 +251,6 @@ public: return children; } - void setMasked(bool masked = true) - { - is_masked = masked; - } - protected: /** Construct query tree node. * Resize children to children size. @@ -286,7 +281,6 @@ protected: QueryTreeNodes children; QueryTreeWeakNodes weak_pointers; - bool is_masked; private: String alias; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 31f07b4ec77..c906fc96976 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -704,7 +704,10 @@ struct IdentifierResolveScope { subquery_depth = parent_scope->subquery_depth; context = parent_scope->context; + projection_mask_map = parent_scope->projection_mask_map; } + else + projection_mask_map = std::make_shared>(); if (auto * union_node = scope_node->as()) { @@ -782,6 +785,9 @@ struct IdentifierResolveScope */ QueryTreeNodePtr expression_join_tree_node; + /// Node hash to mask id map + std::shared_ptr> projection_mask_map; + [[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const { const IdentifierResolveScope * scope_to_check = this; @@ -5120,9 +5126,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi allow_table_expressions /*allow_table_expression*/, {secret_arguments.start, secret_arguments.count}); - for (size_t n = secret_arguments.start; n < secret_arguments.start + secret_arguments.count; ++n) - arguments_projection_names[n] = "[HIDDEN]"; - auto & function_node = *function_node_ptr; /// Replace right IN function argument if it is table or table function with subquery that read ordinary columns @@ -6122,8 +6125,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node { auto node_to_resolve = node; auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression); - if (n >= secrets.first && n < secrets.first + secrets.second) - node_to_resolve->setMasked(); size_t expected_projection_names_size = 1; if (auto * expression_list = node_to_resolve->as()) @@ -6134,6 +6135,16 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node } else { + if (n >= secrets.first && n < secrets.first + secrets.second && !scope.context->getSettingsRef().format_display_secrets_in_show_and_select) + { + if (auto * constant = node_to_resolve->as()) + { + auto [mask, _] = scope.projection_mask_map->insert( {node->getTreeHash(), scope.projection_mask_map->size() + 1} ); + + constant->setMaskId(mask->second); + expression_node_projection_names[0] = "[HIDDEN id: " + std::to_string(mask->second) + "]"; + } + } result_nodes.push_back(std::move(node_to_resolve)); } From c1df83a27fad5503a8f6024c353162a470dfaa48 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Tue, 27 Feb 2024 10:31:14 +0800 Subject: [PATCH 058/356] refactor code --- src/Interpreters/HashJoin.cpp | 82 ++++++++++++++--------------------- 1 file changed, 33 insertions(+), 49 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index bdc7162bbb3..2b6a5f27a1f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1142,20 +1142,48 @@ public: column->reserve(reserve_size); } -protected: +private: + + void checkBlock(const Block & block) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } + } + MutableColumns columns; bool is_join_get; std::vector right_indexes; std::vector type_name; std::vector nullable_column_ptrs; + size_t lazy_defaults_count = 0; + /// for lazy // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, // default_count cannot represent the position of the row LazyOutput lazy_output; bool has_columns_to_add; -private: - size_t lazy_defaults_count = 0; /// for ASOF const IColumn * left_asof_key = nullptr; @@ -1235,29 +1263,7 @@ void AddedColumns::appendFromBlock(const Block & block, size_t row_num,co applyLazyDefaults(); #ifndef NDEBUG - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } + checkBlock(block); #endif if (is_join_get) { @@ -1286,29 +1292,7 @@ template <> void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) { #ifndef NDEBUG - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } + checkBlock(block); #endif if (has_columns_to_add) { From 0c7d12c392a8b52328368d048eb5bb61376ba3ac Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 27 Feb 2024 14:56:18 +0800 Subject: [PATCH 059/356] supress errors --- src/Processors/Transforms/FilterTransform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 4591177850b..0f2509c7510 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -340,6 +340,7 @@ void FilterTransform::doTransform(Chunk & chunk) break; } } + (void)min_size_in_memory; /// Suppress error of clang-analyzer-deadcode.DeadStores size_t num_filtered_rows = 0; if (first_non_constant_column != num_columns) From 4af339555d2d0ac02f275fc886a8c0ad85d9c753 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Tue, 27 Feb 2024 16:19:34 +0800 Subject: [PATCH 060/356] revert test case changes --- tests/performance/any_join.xml | 23 ------------------- .../performance/storage_join_direct_join.xml | 14 ----------- 2 files changed, 37 deletions(-) delete mode 100644 tests/performance/any_join.xml diff --git a/tests/performance/any_join.xml b/tests/performance/any_join.xml deleted file mode 100644 index ed473c36d55..00000000000 --- a/tests/performance/any_join.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - 1 - - - CREATE TABLE keys (key UInt64) ENGINE = MergeTree ORDER BY key; - CREATE TABLE dict (key UInt64, value1 UInt64, value2 Float64, value3 String, - value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, - value10 String) ENGINE = MergeTree ORDER BY key; - - INSERT INTO keys SELECT rand() %500000 FROM numbers(10000000); - INSERT INTO dict SELECT rand() %300000, rand()%1000, rand()*0.0001, toString(number), - toString(number), toString(number), toString(number), toString(number), toString(number), - toString(number), toString(number) FROM numbers(1000000); - - - - SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1 FROM keys ALL INNER JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - - DROP TABLE IF EXISTS keys - DROP TABLE IF EXISTS dict - \ No newline at end of file diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml index d3bc25cd96b..2fc63c2c926 100644 --- a/tests/performance/storage_join_direct_join.xml +++ b/tests/performance/storage_join_direct_join.xml @@ -8,26 +8,12 @@ value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, value10 String) ENGINE = Join(ANY, LEFT, key); - CREATE TABLE dict2 (key UInt64, value1 UInt64, value2 Float64, value3 String, - value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, - value10 String) ENGINE = MergeTree ORDER BY key; - INSERT INTO keys SELECT rand() FROM numbers(10000000); INSERT INTO dict SELECT rand(), rand()%1000, rand()*0.0001, toString(number), toString(number), toString(number), toString(number), toString(number), toString(number), toString(number), toString(number) FROM numbers(1000000); - INSERT INTO dict2 SELECT rand(), rand()%1000, rand()*0.0001, toString(number), - toString(number), toString(number), toString(number), toString(number), toString(number), - toString(number), toString(number) FROM numbers(1000000); - SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict2 AS d ON (keys.key = d.key) FORMAT Null; - SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS allow_experimental_analyzer=1 - - DROP TABLE IF EXISTS keys - DROP TABLE IF EXISTS dict - DROP TABLE IF EXISTS dict2 \ No newline at end of file From c395e4f52f9ea4d68dfd08585053d63f8dbeae31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 27 Feb 2024 12:20:44 +0100 Subject: [PATCH 061/356] Add missing reference --- tests/queries/0_stateless/02972_parallel_replicas_cte.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference index 3321ade3a24..bbb5a960463 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference @@ -2,3 +2,5 @@ 990000 10 990000 +1 +1000000 From c9dd6fe8d57b20918b75e4c8ef06094af0bad229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 27 Feb 2024 12:35:14 +0100 Subject: [PATCH 062/356] Set max_parallel_replicas to 1 when disabling --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 1a9827d30f8..1c9b8d911d9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -885,7 +885,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { /// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas to be able to use a trivial count optimization"); return true; } From 5ed09dc76b01e08cdd758285ebd984e327d8b7c2 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 27 Feb 2024 14:58:17 +0000 Subject: [PATCH 063/356] refactoring, add test --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 40 ++++++----- ...8_analyzer_secret_args_tree_node.reference | 71 +++++++++++++++++++ .../02998_analyzer_secret_args_tree_node.sql | 9 +++ 3 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference create mode 100644 tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index c906fc96976..a479b4a1162 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1380,7 +1380,7 @@ private: ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); - ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const std::pair & secrets = std::pair()); + ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope); @@ -5118,13 +5118,30 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } /// Resolve function arguments - FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder{function_node_ptr->toAST()->as()}.getResult(); bool allow_table_expressions = is_special_function_in; auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(), scope, true /*allow_lambda_expression*/, - allow_table_expressions /*allow_table_expression*/, - {secret_arguments.start, secret_arguments.count}); + allow_table_expressions /*allow_table_expression*/); + + /// Mask arguments if needed + if (!scope.context->getSettingsRef().format_display_secrets_in_show_and_select) + { + if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder{function_node_ptr->toAST()->as()}.getResult(); secret_arguments.count) + { + auto & argument_nodes = function_node_ptr->getArgumentsNode()->as().getNodes(); + + for (size_t n = secret_arguments.start; n < secret_arguments.start + secret_arguments.count; ++n) + { + if (auto * constant = argument_nodes[n]->as()) + { + auto [mask, _] = scope.projection_mask_map->insert( {constant->getTreeHash(), scope.projection_mask_map->size() + 1} ); + constant->setMaskId(mask->second); + arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask->second) + "]"; + } + } + } + } auto & function_node = *function_node_ptr; @@ -6110,7 +6127,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * Example: CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; SELECT plus(*) FROM test_table; * Example: SELECT *** FROM system.one; */ -ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const std::pair & secrets) +ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) { auto & node_list_typed = node_list->as(); size_t node_list_size = node_list_typed.getNodes().size(); @@ -6120,7 +6137,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node ProjectionNames result_projection_names; - size_t n = 0; for (auto & node : node_list_typed.getNodes()) { auto node_to_resolve = node; @@ -6135,16 +6151,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node } else { - if (n >= secrets.first && n < secrets.first + secrets.second && !scope.context->getSettingsRef().format_display_secrets_in_show_and_select) - { - if (auto * constant = node_to_resolve->as()) - { - auto [mask, _] = scope.projection_mask_map->insert( {node->getTreeHash(), scope.projection_mask_map->size() + 1} ); - - constant->setMaskId(mask->second); - expression_node_projection_names[0] = "[HIDDEN id: " + std::to_string(mask->second) + "]"; - } - } result_nodes.push_back(std::move(node_to_resolve)); } @@ -6156,8 +6162,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node result_projection_names.insert(result_projection_names.end(), expression_node_projection_names.begin(), expression_node_projection_names.end()); expression_node_projection_names.clear(); - - ++n; } node_list_typed.getNodes() = std::move(result_nodes); diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference new file mode 100644 index 00000000000..ffd2f68990b --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference @@ -0,0 +1,71 @@ +-- { echoOn } +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', [HIDDEN id: 1], [HIDDEN id: 2]) Nullable(String) + encrypt(\'aes-256-ofb\', [HIDDEN id: 3], [HIDDEN id: 2]) Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\nãì&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: [HIDDEN id: 1], constant_value_type: Nullable(String) + CONSTANT id: 7, constant_value: [HIDDEN id: 2], constant_value_type: String + CONSTANT id: 8, constant_value: \'çø\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 9, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 10, nodes: 3 + CONSTANT id: 11, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 12, constant_value: [HIDDEN id: 3], constant_value_type: Nullable(String) + CONSTANT id: 13, constant_value: [HIDDEN id: 2], constant_value_type: String + JOIN TREE + TABLE id: 14, alias: __table1, table_name: system.one +SET format_display_secrets_in_show_and_select = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', _subquery_1, \'12345678901234567890123456789012\') Nullable(String) + encrypt(\'aes-256-ofb\', _subquery_2, \'12345678901234567890123456789012\') Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\nãì&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: \'qwerty\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 7, is_subquery: 1 + PROJECTION COLUMNS + \'qwerty\' String + PROJECTION + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: \'qwerty\', constant_value_type: String + JOIN TREE + TABLE id: 10, table_name: system.one + CONSTANT id: 11, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + CONSTANT id: 12, constant_value: \'çø\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 13, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 16, constant_value: \'asdf\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 17, is_subquery: 1 + PROJECTION COLUMNS + \'asdf\' String + PROJECTION + LIST id: 18, nodes: 1 + CONSTANT id: 19, constant_value: \'asdf\', constant_value_type: String + JOIN TREE + TABLE id: 20, table_name: system.one + CONSTANT id: 21, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + JOIN TREE + TABLE id: 22, alias: __table1, table_name: system.one diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql new file mode 100644 index 00000000000..7ac718de98e --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -0,0 +1,9 @@ +-- { echoOn } +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); + +SET format_display_secrets_in_show_and_select = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +-- { echoOff } From a2cabc9fe13c0227db5f2bd2eaf090aa9b0189a5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 27 Feb 2024 16:16:15 +0100 Subject: [PATCH 064/356] Fixup --- src/Analyzer/ConstantNode.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 83446ca7d16..e3b87edbdc6 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -100,12 +100,6 @@ bool ConstantNode::receivedFromInitiatorServer() const auto * cast_function = getSourceExpression()->as(); if (!cast_function || cast_function->getFunctionName() != "_CAST") return false; - for (auto const & argument : cast_function->getArguments()) - { - auto * constant_arg = argument->as(); - if (!constant_arg || constant_arg->hasSourceExpression()) - return false; - } return true; } From dd458932c94103bafedb2e6a389171a3d5e54ddd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Feb 2024 17:46:13 +0000 Subject: [PATCH 065/356] Try to avoid calculation of scalar subqueries for CREATE TABLE. --- src/Functions/FunctionTokens.h | 2 + src/Functions/URL/URLHierarchy.cpp | 2 + src/Functions/URL/URLPathHierarchy.cpp | 2 + .../URL/extractURLParameterNames.cpp | 2 + src/Functions/URL/extractURLParameters.cpp | 2 + src/Functions/alphaTokens.cpp | 2 + src/Functions/extractAll.cpp | 2 + src/Functions/identity.cpp | 5 ++ src/Functions/identity.h | 20 +++++- src/Functions/splitByChar.cpp | 2 + src/Functions/splitByNonAlpha.cpp | 2 + src/Functions/splitByRegexp.cpp | 2 + src/Functions/splitByString.cpp | 2 + src/Functions/splitByWhitespace.cpp | 2 + src/Interpreters/ActionsDAG.cpp | 44 +++++++++++++ .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 21 +----- ...1611_constant_folding_subqueries.reference | 8 ++- .../01611_constant_folding_subqueries.sql | 3 + .../02999_scalar_subqueries_bug_1.reference | 66 +++++++++++++++++++ .../02999_scalar_subqueries_bug_1.sql | 8 +++ .../02999_scalar_subqueries_bug_2.reference | 0 .../02999_scalar_subqueries_bug_2.sql | 18 +++++ 23 files changed, 195 insertions(+), 24 deletions(-) create mode 100644 tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference create mode 100644 tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql create mode 100644 tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference create mode 100644 tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index 5c4e582c637..807cbf307a7 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -74,6 +74,8 @@ public: size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return Generator::getArgumentsThatAreAlwaysConstant(); } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { Generator::checkArguments(*this, arguments); diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index 25c6c9ef40b..5e04dfe027d 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -24,6 +24,8 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 9a60d4cf989..b9eff200c78 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -22,6 +22,8 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 08da148b43e..684e28a5220 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -22,6 +22,8 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 939622dd9d1..c4682063337 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -23,6 +23,8 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp index 35cacdbdbb8..35f434e7498 100644 --- a/src/Functions/alphaTokens.cpp +++ b/src/Functions/alphaTokens.cpp @@ -32,6 +32,8 @@ public: static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index ad49f32f769..1112280ea1b 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -50,6 +50,8 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 7174f1fd318..43cca76c801 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -9,4 +9,9 @@ REGISTER_FUNCTION(Identity) factory.registerFunction(); } +REGISTER_FUNCTION(ScalarSubqueryResult) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/identity.h b/src/Functions/identity.h index efee95841f5..c753625caa7 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -6,11 +6,12 @@ namespace DB { -class FunctionIdentity : public IFunction +template +class FunctionIdentityBase : public IFunction { public: - static constexpr auto name = "identity"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -28,4 +29,17 @@ public: } }; +struct IdentityName +{ + static constexpr auto name = "identity"; +}; + +struct ScalarSubqueryResultName +{ + static constexpr auto name = "__scalarSubqueryResult"; +}; + +using FunctionIdentity = FunctionIdentityBase; +using FunctionScalarSubqueryResult = FunctionIdentityBase; + } diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d537039dc23..d3d5dc9fe4a 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -40,6 +40,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 467e7b0b5c3..4486a33aa88 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -42,6 +42,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 77328205c01..430089f14ee 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -44,6 +44,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 7d6803b2f27..5c97f9841e7 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -39,6 +39,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index 168e429c6f5..cf21a218b15 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -30,6 +30,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7240679abb7..87f0e500371 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -64,6 +64,37 @@ std::pair getFunctionArguments(const ActionsDAG::N return { std::move(arguments), all_const }; } +bool isConstantFromScalarSubquery(const ActionsDAG::Node * node) +{ + std::stack stack; + stack.push(node); + while (!stack.empty()) + { + const auto * arg = stack.top(); + stack.pop(); + + if (arg->column && isColumnConst(*arg->column)) + continue; + + while (arg->type == ActionsDAG::ActionType::ALIAS) + arg = arg->children.at(0); + + if (arg->type != ActionsDAG::ActionType::FUNCTION) + return false; + + if (arg->function_base->getName() == "__scalarSubqueryResult") + continue; + + if (arg->children.empty() || !arg->function_base->isSuitableForConstantFolding()) + return false; + + for (const auto * child : arg->children) + stack.push(child); + } + + return true; +} + } void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const @@ -196,6 +227,19 @@ const ActionsDAG::Node & ActionsDAG::addFunction( { auto [arguments, all_const] = getFunctionArguments(children); + auto constant_args = function->getArgumentsThatAreAlwaysConstant(); + for (size_t pos : constant_args) + { + if (pos >= children.size()) + continue; + + if (arguments[pos].column && isColumnConst(*arguments[pos].column)) + continue; + + if (isConstantFromScalarSubquery(children[pos])) + arguments[pos].column = arguments[pos].type->createColumnConstWithDefaultValue(0); + } + auto function_base = function->build(arguments); return addFunctionImpl( function_base, diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 77a022e066b..0cf138c14f6 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -281,7 +281,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr if (data.only_analyze) { ast->as()->alias.clear(); - auto func = makeASTFunction("identity", std::move(ast)); + auto func = makeASTFunction("__scalarSubqueryResult", std::move(ast)); func->alias = subquery_alias; func->prefer_alias_to_column_name = prefer_alias_to_column_name; ast = std::move(func); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 36c2fb725ae..c033b320f66 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -809,24 +809,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else { - /** To get valid sample block we need to prepare query without only_analyze, because we need to execute scalar - * subqueries. Otherwise functions that expect only constant arguments will throw error during query analysis, - * because the result of scalar subquery is not a constant. - * - * Example: - * CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr - * AS - * WITH (SELECT '\d[a-z]') AS constant_value - * SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr - * FROM test_table; - * - * For new analyzer this issue does not exists because we always execute scalar subqueries. - * We can improve this in new analyzer, and execute scalar subqueries only in contexts when we expect constant - * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments. - */ - - InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), SelectQueryOptions()); - as_select_sample = interpreter.getSampleBlock(); + as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); } properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); @@ -1237,7 +1220,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { input_block = InterpreterSelectWithUnionQuery(create.select->clone(), getContext(), - {}).getSampleBlock(); + SelectQueryOptions().analyze()).getSampleBlock(); } Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock(); diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference index c3df2314112..327a4694aa8 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference @@ -5,7 +5,13 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO 1,10 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT - identity(_CAST(0, \'Nullable(UInt64)\')) AS n, + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, toUInt64(10 / n) SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); 0 +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +"\d[a-z]","['5a']" +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); +SELECT + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, + toUInt64(10 / n) diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql index 59f057d1ec5..b30fb43f621 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql @@ -3,3 +3,6 @@ SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUI SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV; EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); + +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference new file mode 100644 index 00000000000..0740afe92c6 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference @@ -0,0 +1,66 @@ +0 0 +0 0 +0 0 +0 0 +1 \N +1 \N +2 \N +2 \N +3 \N +3 \N +4 \N +4 \N +5 \N +5 \N +6 \N +6 \N +7 \N +7 \N +8 \N +8 \N +9 \N +9 \N +10 10 +10 10 +10 10 +10 10 +11 \N +11 \N +12 \N +12 \N +13 \N +13 \N +14 \N +14 \N +15 \N +15 \N +16 \N +16 \N +17 \N +17 \N +18 \N +18 \N +19 \N +19 \N +20 20 +20 20 +20 20 +20 20 +21 \N +21 \N +22 \N +22 \N +23 \N +23 \N +24 \N +24 \N +25 \N +25 \N +26 \N +26 \N +27 \N +27 \N +28 \N +28 \N +29 \N +29 \N diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql new file mode 100644 index 00000000000..797571a8552 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql @@ -0,0 +1,8 @@ +drop table if exists t_table_select; +CREATE TABLE t_table_select (id UInt32) ENGINE = MergeTree ORDER BY id; +INSERT INTO t_table_select (id) SELECT number FROM numbers(30); + +CREATE TEMPORARY TABLE t_test AS SELECT a.id, b.id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS a GLOBAL LEFT JOIN (SELECT id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS b WHERE (b.id % 10) = 0) AS b ON b.id = a.id SETTINGS join_use_nulls = 1; + +select * from t_test order by id; + diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql new file mode 100644 index 00000000000..03ac91e401a --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql @@ -0,0 +1,18 @@ +drop table if exists source; +drop table if exists target1; +drop table if exists target2; +drop table if exists v_heavy; + + +create table source(type String) engine=MergeTree order by type; + +create view v_heavy as +with nums as (select number from numbers(1e5)) +select count(*) n from (select number from numbers(1e5) n1 cross join nums); + +create table target1(type String) engine=MergeTree order by type; +create table target2(type String) engine=MergeTree order by type; + +set max_execution_time=2; +-- we should not execute scalar subquery here +create materialized view vm_target2 to target2 as select * from source where type='two' and (select sum(sleepEachRow(0.1)) from numbers(30)); From 1b79c0a3814e3082e5336adc8194ce5c6f933c6a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Feb 2024 19:42:49 +0000 Subject: [PATCH 066/356] Fixing tests. --- src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp | 2 +- .../queries/0_stateless/01029_early_constant_folding.reference | 2 +- tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql | 2 +- .../02415_all_new_functions_must_be_documented.reference | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 13b6311a877..f3504f3f403 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -53,7 +53,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity" || function->name == "__scalarSubqueryResult") { if (const auto * expr_list = function->arguments->as()) { diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference index abcb2ddc6a7..4df5414ba4a 100644 --- a/tests/queries/0_stateless/01029_early_constant_folding.reference +++ b/tests/queries/0_stateless/01029_early_constant_folding.reference @@ -2,7 +2,7 @@ SELECT 1 WHERE 0 SELECT 1 SELECT 1 -WHERE (1 IN (0, 2)) AND (2 = (identity(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) +WHERE (1 IN (0, 2)) AND (2 = (__scalarSubqueryResult(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) SELECT 1 WHERE 1 IN (( SELECT arrayJoin([1, 2, 3]) diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index c1cec6ea212..809c5921d95 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -7,7 +7,7 @@ SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {ser SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 43} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184012345678910111213141516171819Z', 3, 'UTC'); -- {serverError 6} diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 379eea4dbbb..cd776611857 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -62,6 +62,7 @@ __bitBoolMaskOr __bitSwapLastTwo __bitWrapperFunc __getScalar +__scalarSubqueryResult abs accurateCast accurateCastOrDefault From 802db19e3b6a7d6d2828c64f8d9b4bfbc68db3f8 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 27 Feb 2024 20:42:02 +0000 Subject: [PATCH 067/356] dedicated FunctionSecretArgumentsFinder for TreeNode, refactoring of FunctionSecretArgumentsFinder --- .../FunctionSecretArgumentsFinderTreeNode.h | 371 +++++++++++++ src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- src/Parsers/ASTFunction.cpp | 4 +- src/Parsers/FunctionSecretArgumentsFinder.cpp | 19 + src/Parsers/FunctionSecretArgumentsFinder.h | 488 +---------------- .../FunctionSecretArgumentsFinderAST.h | 499 ++++++++++++++++++ 6 files changed, 895 insertions(+), 488 deletions(-) create mode 100644 src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h create mode 100644 src/Parsers/FunctionSecretArgumentsFinder.cpp create mode 100644 src/Parsers/FunctionSecretArgumentsFinderAST.h diff --git a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h new file mode 100644 index 00000000000..7e9a31868b2 --- /dev/null +++ b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h @@ -0,0 +1,371 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinderTreeNode +{ +public: + explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments()) + { + if (arguments.getNodes().empty()) + return; + + findFunctionSecretArguments(); + } + + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` + std::vector nested_maps; + + bool hasSecrets() const + { + return count != 0 || !nested_maps.empty(); + } + }; + + FunctionSecretArgumentsFinder::Result getResult() const { return result; } + +private: + const FunctionNode & function; + const ListNode & arguments; + FunctionSecretArgumentsFinder::Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments.getNodes().size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findFunctionSecretArguments() + { + const auto & name = function.getFunctionName(); + + if ((name == "mysql") || (name == "postgresql") || (name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((name == "s3") || (name == "cosn") || (name == "oss") || + (name == "deltaLake") || (name == "hudi") || (name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((name == "remote") || (name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((name == "encrypt") || (name == "decrypt") || + (name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") || + (name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + const auto & nodes = arguments.getNodes(); + size_t count = nodes.size(); + while (count > 0) + { + const FunctionNode * f = nodes.at(count - 1)->as(); + if (!f) + break; + if (f->getFunctionName() == "headers") + result.nested_maps.push_back(f->getFunctionName()); + else if (f->getFunctionName() != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments.getNodes().size()) + return false; + + return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument->as()) + { + if (literal->getValue().getType() != Field::Types::String) + return false; + if (res) + *res = literal->getValue().safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument->as()) + { + if (res) + *res = id->getIdentifier().getFullName(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments.getNodes().size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = arguments.getNodes()[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName())) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments.getNodes().empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments.getNodes().size() - 1; + } + + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments.getNodes().size() <= arg_idx) + return false; + + const auto * identifier = arguments.getNodes()[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments.getNodes().size(); ++i) + { + const auto & argument = arguments.getNodes()[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->getFunctionName() != "equals")) + continue; + + const auto * expr_list = equals_func->getArguments().as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->getNodes(); + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a479b4a1162..f21df6b5ccc 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5127,7 +5127,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi /// Mask arguments if needed if (!scope.context->getSettingsRef().format_display_secrets_in_show_and_select) { - if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder{function_node_ptr->toAST()->as()}.getResult(); secret_arguments.count) + if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder::find(*function_node_ptr); secret_arguments.count) { auto & argument_nodes = function_node_ptr->getArgumentsNode()->as().getNodes(); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 7468c41910a..4dac9c090f6 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -693,7 +693,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult(); + secret_arguments = FunctionSecretArgumentsFinder::find(*this); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { @@ -757,7 +757,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinder::find(*this).hasSecrets()) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) diff --git a/src/Parsers/FunctionSecretArgumentsFinder.cpp b/src/Parsers/FunctionSecretArgumentsFinder.cpp new file mode 100644 index 00000000000..bdeb29a37ba --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinder.cpp @@ -0,0 +1,19 @@ +#include +#include +#include + + +namespace DB +{ + +FunctionSecretArgumentsFinder::Result FunctionSecretArgumentsFinder::find(const ASTFunction & function) +{ + return FunctionSecretArgumentsFinderAST(function).getResult(); +} + +FunctionSecretArgumentsFinder::Result FunctionSecretArgumentsFinder::find(const FunctionNode & function) +{ + return FunctionSecretArgumentsFinderTreeNode(function).getResult(); +} + +} diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h index 355dd99a21a..950d913fe20 100644 --- a/src/Parsers/FunctionSecretArgumentsFinder.h +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -1,44 +1,14 @@ #pragma once -#include +#include #include -#include -#include -#include - -#include - namespace DB { - -/// Finds arguments of a specified function which should not be displayed for most users for security reasons. -/// That involves passwords and secret keys. class FunctionSecretArgumentsFinder { public: - explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) - { - if (!function.arguments) - return; - - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - arguments = &expr_list->children; - switch (function.kind) - { - case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; - case ASTFunction::Kind::WINDOW_FUNCTION: break; - case ASTFunction::Kind::LAMBDA_FUNCTION: break; - case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; - case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; - case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; - } - } - struct Result { /// Result constructed by default means no arguments will be hidden. @@ -55,460 +25,8 @@ public: } }; - Result getResult() const { return result; } - -private: - const ASTFunction & function; - const ASTs * arguments = nullptr; - Result result; - - void markSecretArgument(size_t index, bool argument_is_named = false) - { - if (index >= arguments->size()) - return; - if (!result.count) - { - result.start = index; - result.are_named = argument_is_named; - } - chassert(index >= result.start); /// We always check arguments consecutively - result.count = index + 1 - result.start; - if (!argument_is_named) - result.are_named = false; - } - - void findOrdinaryFunctionSecretArguments() - { - if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) - /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || - (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) - { - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ false); - } - else if (function.name == "s3Cluster") - { - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ true); - } - else if ((function.name == "remote") || (function.name == "remoteSecure")) - { - /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - findRemoteFunctionSecretArguments(); - } - else if ((function.name == "encrypt") || (function.name == "decrypt") || - (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || - (function.name == "tryDecrypt")) - { - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - findEncryptionFunctionSecretArguments(); - } - else if (function.name == "url") - { - findURLSecretArguments(); - } - } - - void findMySQLFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// mysql(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - markSecretArgument(4); - } - } - - /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should - /// always be at the end). Marks "headers" as secret, if found. - size_t excludeS3OrURLNestedMaps() - { - size_t count = arguments->size(); - while (count > 0) - { - const ASTFunction * f = arguments->at(count - 1)->as(); - if (!f) - break; - if (f->name == "headers") - result.nested_maps.push_back(f->name); - else if (f->name != "extra_credentials") - break; - count -= 1; - } - return count; - } - - void findS3FunctionSecretArguments(bool is_cluster_function) - { - /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. - size_t url_arg_idx = is_cluster_function ? 1 : 0; - - if (!is_cluster_function && isNamedCollectionName(0)) - { - /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) - { - String second_arg; - if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: s3('url', 'format', ...) - } - } - - /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (url_arg_idx + 2 < count) - markSecretArgument(url_arg_idx + 2); - } - - void findURLSecretArguments() - { - if (!isNamedCollectionName(0)) - excludeS3OrURLNestedMaps(); - } - - bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const - { - if (arg_idx >= arguments->size()) - return false; - - return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); - } - - static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) - { - if (const auto * literal = argument.as()) - { - if (literal->value.getType() != Field::Types::String) - return false; - if (res) - *res = literal->value.safeGet(); - return true; - } - - if (allow_identifier) - { - if (const auto * id = argument.as()) - { - if (res) - *res = id->name(); - return true; - } - } - - return false; - } - - void findRemoteFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// remote(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - return; - } - - /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: - /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) - - /// But we should check the number of arguments first because we don't need to do any replacements in case of - /// remote('addresses_expr', db.table) - if (arguments->size() < 3) - return; - - size_t arg_num = 1; - - /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. - const auto * table_function = (*arguments)[arg_num]->as(); - if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) - { - ++arg_num; - } - else - { - std::optional database; - std::optional qualified_table_name; - if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) - { - /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. - /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' - /// before the argument 'password'. So it's safer to wipe two arguments just in case. - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `user`. - markSecretArgument(arg_num + 2); - } - if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `sharding_key`. - markSecretArgument(arg_num + 3); - } - return; - } - - /// Skip the current argument (which is either a database name or a qualified table name). - ++arg_num; - if (database) - { - /// Skip the 'table' argument if the previous argument was a database name. - ++arg_num; - } - } - - /// Skip username. - ++arg_num; - - /// Do our replacement: - /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); - if (can_be_password) - markSecretArgument(arg_num); - } - - /// Tries to get either a database name or a qualified table name from an argument. - /// Empty string is also allowed (it means the default database). - /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. - bool tryGetDatabaseNameOrQualifiedTableName( - size_t arg_idx, - std::optional & res_database, - std::optional & res_qualified_table_name) const - { - res_database.reset(); - res_qualified_table_name.reset(); - - String str; - if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) - return false; - - if (str.empty()) - { - res_database = ""; - return true; - } - - auto qualified_table_name = QualifiedTableName::tryParseFromString(str); - if (!qualified_table_name) - return false; - - if (qualified_table_name->database.empty()) - res_database = std::move(qualified_table_name->table); - else - res_qualified_table_name = std::move(qualified_table_name); - return true; - } - - void findEncryptionFunctionSecretArguments() - { - if (arguments->empty()) - return; - - /// We replace all arguments after 'mode' with '[HIDDEN]': - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - result.start = 1; - result.count = arguments->size() - 1; - } - - void findTableEngineSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "ExternalDistributed") - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - findExternalDistributedTableEngineSecretArguments(); - } - else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) - { - /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) - /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || - (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) - { - /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - findS3TableEngineSecretArguments(); - } - else if (engine_name == "URL") - { - findURLSecretArguments(); - } - } - - void findExternalDistributedTableEngineSecretArguments() - { - if (isNamedCollectionName(1)) - { - /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 2); - } - else - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - markSecretArgument(5); - } - } - - void findS3TableEngineSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'secret_access_key') - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((3 <= count) && (count <= 4)) - { - String second_arg; - if (tryGetStringFromArgument(1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (count == 3) - { - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: S3('url', 'format', ...) - } - } - } - - /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (2 < count) - markSecretArgument(2); - } - - void findDatabaseEngineSecretArguments() - { - const String & engine_name = function.name; - if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || - (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL")) - { - /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password') - findMySQLDatabaseSecretArguments(); - } - else if (engine_name == "S3") - { - /// S3('url', 'access_key_id', 'secret_access_key') - findS3DatabaseSecretArguments(); - } - } - - void findMySQLDatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// MySQL(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// MySQL('host:port', 'database', 'user', 'password') - markSecretArgument(3); - } - } - - void findS3DatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'password', ...) - findSecretNamedArgument("secret_access_key", 1); - } - else - { - /// S3('url', 'access_key_id', 'secret_access_key') - markSecretArgument(2); - } - } - - void findBackupNameSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "S3") - { - /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - markSecretArgument(2); - } - } - - /// Whether a specified argument can be the name of a named collection? - bool isNamedCollectionName(size_t arg_idx) const - { - if (arguments->size() <= arg_idx) - return false; - - const auto * identifier = (*arguments)[arg_idx]->as(); - return identifier != nullptr; - } - - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) - { - for (size_t i = start; i < arguments->size(); ++i) - { - const auto & argument = (*arguments)[i]; - const auto * equals_func = argument->as(); - if (!equals_func || (equals_func->name != "equals")) - continue; - - const auto * expr_list = equals_func->arguments->as(); - if (!expr_list) - continue; - - const auto & equal_args = expr_list->children; - if (equal_args.size() != 2) - continue; - - String found_key; - if (!tryGetStringFromArgument(*equal_args[0], &found_key)) - continue; - - if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); - } - } + static Result find(const ASTFunction & function); + static Result find(const FunctionNode & function); }; } diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h new file mode 100644 index 00000000000..348b2ca9e3a --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -0,0 +1,499 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinderAST +{ +public: + explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_) + { + if (!function.arguments) + return; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + arguments = &expr_list->children; + switch (function.kind) + { + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; + } + } + + FunctionSecretArgumentsFinder::Result getResult() const { return result; } + +private: + const ASTFunction & function; + const ASTs * arguments = nullptr; + FunctionSecretArgumentsFinder::Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments->size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() + { + if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || + (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((function.name == "remote") || (function.name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((function.name == "encrypt") || (function.name == "decrypt") || + (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || + (function.name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (function.name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + size_t count = arguments->size(); + while (count > 0) + { + const ASTFunction * f = arguments->at(count - 1)->as(); + if (!f) + break; + if (f->name == "headers") + result.nested_maps.push_back(f->name); + else if (f->name != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments->size()) + return false; + + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) + { + if (literal->value.getType() != Field::Types::String) + return false; + if (res) + *res = literal->value.safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument.as()) + { + if (res) + *res = id->name(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments->size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = (*arguments)[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments->empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments->size() - 1; + } + + void findTableEngineSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "ExternalDistributed") + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + findExternalDistributedTableEngineSecretArguments(); + } + else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) + { + /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) + /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || + (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) + { + /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) + findS3TableEngineSecretArguments(); + } + else if (engine_name == "URL") + { + findURLSecretArguments(); + } + } + + void findExternalDistributedTableEngineSecretArguments() + { + if (isNamedCollectionName(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((3 <= count) && (count <= 4)) + { + String second_arg; + if (tryGetStringFromArgument(1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (count == 3) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: S3('url', 'format', ...) + } + } + } + + /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (2 < count) + markSecretArgument(2); + } + + void findDatabaseEngineSecretArguments() + { + const String & engine_name = function.name; + if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || + (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL")) + { + /// MySQL('host:port', 'database', 'user', 'password') + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); + } + else if (engine_name == "S3") + { + /// S3('url', 'access_key_id', 'secret_access_key') + findS3DatabaseSecretArguments(); + } + } + + void findMySQLDatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findS3DatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'password', ...) + findSecretNamedArgument("secret_access_key", 1); + } + else + { + /// S3('url', 'access_key_id', 'secret_access_key') + markSecretArgument(2); + } + } + + void findBackupNameSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "S3") + { + /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) + markSecretArgument(2); + } + } + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} From d4b71748fc4aa54bc233d37f14ad54225eaf3267 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 27 Feb 2024 22:38:41 +0000 Subject: [PATCH 068/356] fix style --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f21df6b5ccc..5046a879fa5 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5135,9 +5135,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi { if (auto * constant = argument_nodes[n]->as()) { - auto [mask, _] = scope.projection_mask_map->insert( {constant->getTreeHash(), scope.projection_mask_map->size() + 1} ); - constant->setMaskId(mask->second); - arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask->second) + "]"; + auto mask = scope.projection_mask_map->insert( {constant->getTreeHash(), scope.projection_mask_map->size() + 1} ).first->second; + constant->setMaskId(mask); + arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask) + "]"; } } } From ad7715b2fa2f63183b6989bfedd2a741a4df30cd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 27 Feb 2024 23:12:29 +0000 Subject: [PATCH 069/356] fix style --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 5046a879fa5..819a3758e26 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5135,7 +5135,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi { if (auto * constant = argument_nodes[n]->as()) { - auto mask = scope.projection_mask_map->insert( {constant->getTreeHash(), scope.projection_mask_map->size() + 1} ).first->second; + auto mask = scope.projection_mask_map->insert({constant->getTreeHash(), scope.projection_mask_map->size() + 1}).first->second; constant->setMaskId(mask); arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask) + "]"; } From cca154b6b9a443d29bc3609a77c8b14d33a00317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B1=AA=E8=82=A5=E8=82=A5?= Date: Wed, 28 Feb 2024 12:16:34 +0800 Subject: [PATCH 070/356] fix option ambiguous --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 867e7a69a6d..68f0e52ce08 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -841,7 +841,7 @@ void LocalServer::addOptions(OptionsDescription & options_description) /// If structure argument is omitted then initial query is not generated ("structure,S", po::value(), "structure of the initial table (list of column and type names)") - ("file,f", po::value(), "path to file with data of the initial table (stdin if not specified)") + ("file,F", po::value(), "path to file with data of the initial table (stdin if not specified)") ("input-format", po::value(), "input format of the initial table data") ("output-format", po::value(), "default output format") From 4f2ab335af2df8a6e5d4c45810231c31f1bc573c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Feb 2024 17:07:49 +0800 Subject: [PATCH 071/356] fix some bugs --- src/Functions/multiIf.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 0477e99dfcd..68febd54eaf 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -157,6 +157,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { + /// Fast path when data is empty + if (input_rows_count == 0) + return result_type->createColumn(); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); /** We will gather values from columns in branches to result column, @@ -257,7 +261,7 @@ public: } const WhichDataType which(removeNullable(result_type)); - bool execute_multiif_columnar = settings.allow_execute_multiif_columnar && !contains_short + bool execute_multiif_columnar = allow_execute_multiif_columnar && !contains_short && instructions.size() <= std::numeric_limits::max() && (which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isEnum() || which.isIPv4() || which.isIPv6()); @@ -266,6 +270,7 @@ public: if (!execute_multiif_columnar) { MutableColumnPtr res = return_type->createColumn(); + res->reserve(rows); executeInstructions(instructions, rows, res); return std::move(res); } @@ -367,7 +372,7 @@ private: template static void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) { - for (S i = static_cast(instructions.size() - 1); i >= 0; --i) + for (S i = instructions.size() - 1; i != static_cast(-1); --i) { auto & instruction = instructions[i]; if (instruction.condition_always_true) @@ -450,6 +455,7 @@ private: data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); } } + for (size_t row_i = 0; row_i < rows; ++row_i) { S insert = inserts[row_i]; From dcafa2a3b3733cc612d21baf58d7998fe31a1fb9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 28 Feb 2024 10:56:28 +0000 Subject: [PATCH 072/356] Fix: test_parallel_replicas_custom_key_load_balancing --- .../test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py index b9d4d029703..d5e17103296 100644 --- a/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py @@ -109,10 +109,13 @@ def test_parallel_replicas_custom_key_load_balancing( == "subqueries\t4\n" ) - # check queries per node - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: + # check queries per node + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t2\nn2\t1\nn3\t1\nn4\t1\n" ) - == "n1\t2\nn2\t1\nn3\t1\nn4\t1\n" - ) From c707820c3c996c09da3811c15dc5d55068e16e79 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Feb 2024 12:38:43 +0000 Subject: [PATCH 073/356] Fixing test --- tests/queries/0_stateless/02116_tuple_element.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..a2df6fadde0 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, 10 } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, 10 } DROP TABLE t_tuple_element; From 4beb6fdcb037abce135b7d189b990737a824b2fe Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Feb 2024 22:39:49 +0800 Subject: [PATCH 074/356] fix code stule --- src/Functions/multiIf.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 68febd54eaf..b0a344be96e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -28,7 +28,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; } namespace @@ -370,7 +369,7 @@ private: /// We should read source from which instruction on each row? template - static void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) + static NO_INLINE void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) { for (S i = instructions.size() - 1; i != static_cast(-1); --i) { @@ -411,7 +410,7 @@ private: } template - static void executeInstructionsColumnar( + static NO_INLINE void executeInstructionsColumnar( std::vector & instructions, size_t rows, PaddedPODArray & res_data, From f86213ecb615aa0949b878c3ac58ec63560dd886 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:29:24 +0100 Subject: [PATCH 075/356] Cancel PipelineExecutor properly in case of exception in spawnThreads --- src/Processors/Executors/PipelineExecutor.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index a06bacd7d3b..c3fbe6788c6 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -399,7 +399,18 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) if (num_threads > 1) { - spawnThreads(); // start at least one thread + try + { + spawnThreads(); // start at least one thread + } + catch (...) + { + /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK. + /// We should cancel execution properly before rethrow. + cancel(); + throw; + } + tasks.processAsyncTasks(); pool->wait(); } From b847baa8c619fb321f9fb19354493ae67d4403ab Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Feb 2024 15:42:50 +0000 Subject: [PATCH 076/356] Fixing tests. --- .../0_stateless/00597_push_down_predicate_long.reference | 2 +- tests/queries/0_stateless/02116_tuple_element.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00597_push_down_predicate_long.reference b/tests/queries/0_stateless/00597_push_down_predicate_long.reference index 04b0432c0a9..2c46edc98bf 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate_long.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate_long.reference @@ -114,7 +114,7 @@ FROM ( SELECT 1 AS id, - identity(_CAST(1, \'Nullable(UInt8)\')) AS subquery + __scalarSubqueryResult(_CAST(1, \'Nullable(UInt8)\')) AS subquery WHERE subquery = 1 ) WHERE subquery = 1 diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index a2df6fadde0..64d9b9db331 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, 10 } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, 10 } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; From aef5818216fddaf9e2c13674e5e43632a3cc8cb8 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 28 Feb 2024 15:45:33 +0000 Subject: [PATCH 077/356] fix test --- .../0_stateless/02998_analyzer_secret_args_tree_node.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql index 7ac718de98e..f1d916eca80 100644 --- a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest: encrypt function doesn't exist in the fastest build + -- { echoOn } SET allow_experimental_analyzer = 1; From 0fa22abb95c4848b785edab868439143f024b1eb Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Feb 2024 16:53:55 +0000 Subject: [PATCH 078/356] refactoring of virtual columns --- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 35 +++++----- src/Storages/FileLog/StorageFileLog.h | 5 +- src/Storages/Hive/StorageHive.cpp | 11 ++-- src/Storages/Hive/StorageHive.h | 2 - src/Storages/IStorage.cpp | 7 ++ src/Storages/IStorage.h | 4 +- src/Storages/Kafka/KafkaSource.cpp | 2 +- src/Storages/Kafka/StorageKafka.cpp | 66 ++++++++----------- src/Storages/Kafka/StorageKafka.h | 5 +- src/Storages/LiveView/StorageLiveView.cpp | 12 ++-- src/Storages/LiveView/StorageLiveView.h | 2 - .../MergeTree/StorageFromMergeTreeDataPart.h | 6 +- src/Storages/NATS/StorageNATS.cpp | 32 ++++----- src/Storages/NATS/StorageNATS.h | 2 +- .../StorageMaterializedPostgreSQL.cpp | 19 +++--- .../StorageMaterializedPostgreSQL.h | 5 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 45 +++++++------ src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 +- src/Storages/StorageKeeperMap.cpp | 11 ++-- src/Storages/StorageKeeperMap.h | 2 - src/Storages/StorageMaterializedMySQL.cpp | 13 +--- src/Storages/StorageMaterializedMySQL.h | 1 - src/Storages/StorageMaterializedView.cpp | 7 +- src/Storages/StorageMaterializedView.h | 3 - src/Storages/StorageMerge.h | 3 + src/Storages/StorageProxy.h | 1 - src/Storages/StorageValues.cpp | 5 +- src/Storages/StorageValues.h | 16 ++--- .../System/StorageSystemDictionaries.cpp | 17 +++-- .../System/StorageSystemDictionaries.h | 5 +- .../System/StorageSystemPartsBase.cpp | 11 ++-- src/Storages/System/StorageSystemPartsBase.h | 2 - 33 files changed, 170 insertions(+), 193 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index c7a03c0bc6d..c4b707a0ce0 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -574,7 +574,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat views_data.source_storage_id, views_data.source_metadata_snapshot->getColumns(), std::move(block), - views_data.source_storage->getVirtuals())); + *views_data.source_storage->getVirtualsDescription())); QueryPipelineBuilder pipeline; diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b0bac081d38..23e869bfa89 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -27,6 +27,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include @@ -148,6 +149,9 @@ StorageFileLog::StorageFileLog( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + auto virtuals = createVirtuals(filelog_settings->handle_error_mode); + setVirtuals(virtuals); + if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { if (LoadingStrictnessLevel::ATTACH <= mode) @@ -203,6 +207,22 @@ StorageFileLog::StorageFileLog( } } +VirtualColumnsDescription StorageFileLog::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_filename", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_record", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} + void StorageFileLog::loadMetaFiles(bool attach) { /// Attach table @@ -1009,19 +1029,4 @@ bool StorageFileLog::updateFileInfos() return events.empty() || file_infos.file_names.empty(); } -NamesAndTypesList StorageFileLog::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_filename", std::make_shared(std::make_shared())}, - {"_offset", std::make_shared()}}; - - if (filelog_settings->handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_record", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index bdcf85ad6e5..ede36600f92 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -9,6 +9,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include @@ -102,8 +103,6 @@ public: String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } - NamesAndTypesList getVirtuals() const override; - static UInt64 getInode(const String & file_name); void openFilesAndSetPos(); @@ -212,6 +211,8 @@ private: UInt64 inode = 0; }; ReadMetadataResult readMetadata(const String & filename) const; + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 183a4532281..f8c10ea4249 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -45,6 +45,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -444,6 +445,9 @@ StorageHive::StorageHive( storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); setInMemoryMetadata(storage_metadata); + + auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); + setVirtuals(virtuals); } void StorageHive::lazyInitialize() @@ -1020,13 +1024,6 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive"); } -NamesAndTypesList StorageHive::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - std::optional StorageHive::totalRows(const Settings & settings) const { /// query_info is not used when prune_level == PruneLevel::None diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 07440097f7a..67ef153af0e 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -54,8 +54,6 @@ public: SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool async_insert) override; - NamesAndTypesList getVirtuals() const override; - bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 9e2c7bd58ee..27593bfe7a8 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -28,6 +28,13 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_TABLE; } +IStorage::IStorage(StorageID storage_id_) + : storage_id(std::move(storage_id_)) + , metadata(std::make_unique()) + , virtuals(std::make_unique()) +{ +} + bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overridden by real column diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1ce70d3c84c..bd64447a00e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -99,9 +99,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_) - : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) {} + explicit IStorage(StorageID storage_id_); IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index dc62c13f633..9c68107872e 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -45,7 +45,7 @@ KafkaSource::KafkaSource( , max_block_size(max_block_size_) , commit_in_suffix(commit_in_suffix_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtualColumnNames())) + , virtual_header(storage.getVirtualsHeader()) , handle_error_mode(storage.getStreamingHandleErrorMode()) { } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index aa347fc719d..3656b599788 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -52,6 +52,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #if USE_KRB5 @@ -344,6 +345,10 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + + auto virtuals = createVirtuals(kafka_settings->kafka_handle_error_mode); + setVirtuals(virtuals); + auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) { @@ -365,6 +370,28 @@ StorageKafka::StorageKafka( }); } +VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_topic", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_key", std::make_shared(), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + desc.addEphemeral("_partition", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_timestamp_ms", std::make_shared(std::make_shared(3)), ""); + desc.addEphemeral("_headers.name", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_headers.value", std::make_shared(std::make_shared()), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(), ""); + desc.addEphemeral("_error", std::make_shared(), ""); + } + + return desc; +} + SettingsChanges StorageKafka::createSettingsAdjustments() { SettingsChanges result; @@ -1187,43 +1214,4 @@ void registerStorageKafka(StorageFactory & factory) factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } -NamesAndTypesList StorageKafka::getVirtuals() const -{ - auto result = NamesAndTypesList{ - {"_topic", std::make_shared(std::make_shared())}, - {"_key", std::make_shared()}, - {"_offset", std::make_shared()}, - {"_partition", std::make_shared()}, - {"_timestamp", std::make_shared(std::make_shared())}, - {"_timestamp_ms", std::make_shared(std::make_shared(3))}, - {"_headers.name", std::make_shared(std::make_shared())}, - {"_headers.value", std::make_shared(std::make_shared())}}; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message", std::make_shared()}); - result.push_back({"_error", std::make_shared()}); - } - return result; -} - -Names StorageKafka::getVirtualColumnNames() const -{ - auto result = Names { - "_topic", - "_key", - "_offset", - "_partition", - "_timestamp", - "_timestamp_ms", - "_headers.name", - "_headers.value", - }; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message"}); - result.push_back({"_error"}); - } - return result; -} - } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index f9a1e3ff6f3..6a14bd64cc6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -7,6 +7,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include @@ -74,8 +75,6 @@ public: const auto & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; - Names getVirtualColumnNames() const; StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; } struct SafeConsumers @@ -159,6 +158,8 @@ private: bool checkDependencies(const StorageID & table_id); void cleanConsumers(); + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 2f011567b90..631c88a4dad 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,6 +27,7 @@ limitations under the License. */ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include @@ -218,6 +219,10 @@ StorageLiveView::StorageLiveView( setInMemoryMetadata(storage_metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_version", std::make_shared(), ""); + setVirtuals(virtuals); + if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -256,13 +261,6 @@ StorageLiveView::~StorageLiveView() shutdown(false); } -NamesAndTypesList StorageLiveView::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_version", std::make_shared()) - }; -} - void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 6b8780cb81b..08f6ed4e38f 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -73,8 +73,6 @@ public: bool supportsFinal() const override { return true; } - NamesAndTypesList getVirtuals() const override; - void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void drop() override; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index bbb38346f38..da4405dfd40 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -34,6 +34,7 @@ public: , partition_id(part_->info.partition_id) { setInMemoryMetadata(storage.getInMemoryMetadata()); + setVirtuals(*storage.getVirtualsDescription()); } /// Used in queries with projection. @@ -90,11 +91,6 @@ public: bool supportsSubcolumns() const override { return true; } - NamesAndTypesList getVirtuals() const override - { - return storage.getVirtuals(); - } - String getPartitionId() const { return partition_id; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index c7a5d0b8d0a..a06221e1b26 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -89,6 +89,9 @@ StorageNATS::StorageNATS( storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + auto virtuals = createVirtuals(nats_settings->nats_handle_error_mode); + setVirtuals(virtuals); + nats_context = addSettings(getContext()); nats_context->makeQueryContext(); @@ -131,6 +134,19 @@ StorageNATS::StorageNATS( connection_task->deactivate(); } +VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_subject", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageNATS::parseList(const String & list, char delim) { @@ -746,20 +762,4 @@ void registerStorageNATS(StorageFactory & factory) factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageNATS::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_subject", std::make_shared()} - }; - - if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 94f955ccdae..41d77acfde6 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -61,7 +61,6 @@ public: NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; void incrementReader(); void decrementReader(); @@ -137,6 +136,7 @@ private: static Names parseList(const String & list, char delim); static String getTableBasedName(String name, const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); ContextMutablePtr addSettings(ContextPtr context) const; size_t getMaxBlockSize() const; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 61d83750c31..2914c17b117 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -1,4 +1,5 @@ #include "StorageMaterializedPostgreSQL.h" +#include "Storages/VirtualColumnsDescription.h" #if USE_LIBPQXX #include @@ -72,6 +73,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL is allowed only for Atomic database"); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); replication_settings->materialized_postgresql_tables_list = remote_table_name_; @@ -127,8 +129,16 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( , nested_table_id(nested_storage_->getStorageID()) { setInMemoryMetadata(nested_storage_->getInMemoryMetadata()); + setVirtuals(*nested_storage_->getVirtualsDescription()); } +VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals() +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_sign", std::make_shared(), ""); + desc.addEphemeral("_version", std::make_shared(), ""); + return desc; +} /// A temporary clone table might be created for current table in order to update its schema and reload /// all data in the background while current table will still handle read requests. @@ -254,15 +264,6 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool sync, ContextPtr lo } -NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const -{ - return NamesAndTypesList{ - {"_sign", std::make_shared()}, - {"_version", std::make_shared()} - }; -} - - bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const { return needRewriteQueryWithFinalForStorage(column_names, getNested()); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 2d5c1efae7c..41f72ea79e1 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -1,5 +1,6 @@ #pragma once +#include "Storages/VirtualColumnsDescription.h" #include "config.h" #if USE_LIBPQXX @@ -89,8 +90,6 @@ public: /// Used only for single MaterializedPostgreSQL storage. void dropInnerTableIfAny(bool sync, ContextPtr local_context) override; - NamesAndTypesList getVirtuals() const override; - bool needRewriteQueryWithFinal(const Names & column_names) const override; void read( @@ -138,6 +137,8 @@ private: static std::shared_ptr getMaterializedColumnsDeclaration( String name, String type, UInt64 default_value); + static VirtualColumnsDescription createVirtuals(); + ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; String getNestedTableName() const; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e2ef23193c5..fb7b9c10d7f 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -30,6 +30,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" namespace DB { @@ -137,6 +138,9 @@ StorageRabbitMQ::StorageRabbitMQ( storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + auto virtuals = createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode); + setVirtuals(virtuals); + rabbitmq_context = addSettings(getContext()); rabbitmq_context->makeQueryContext(); @@ -191,6 +195,26 @@ StorageRabbitMQ::StorageRabbitMQ( init_task->deactivate(); } +VirtualColumnsDescription StorageRabbitMQ::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_exchange_name", std::make_shared(), ""); + desc.addEphemeral("_channel_id", std::make_shared(), ""); + desc.addEphemeral("_delivery_tag", std::make_shared(), ""); + desc.addEphemeral("_redelivered", std::make_shared(), ""); + desc.addEphemeral("_message_id", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(), ""); + + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageRabbitMQ::parseSettings(String settings_list) { @@ -1213,25 +1237,4 @@ void registerStorageRabbitMQ(StorageFactory & factory) factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageRabbitMQ::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_exchange_name", std::make_shared()}, - {"_channel_id", std::make_shared()}, - {"_delivery_tag", std::make_shared()}, - {"_redelivered", std::make_shared()}, - {"_message_id", std::make_shared()}, - {"_timestamp", std::make_shared()} - }; - - if (rabbitmq_settings->rabbitmq_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b3a0b53cde5..603a1f1d68f 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -9,6 +9,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include #include @@ -68,7 +69,6 @@ public: RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; String getExchange() const { return exchange_name; } void unbindExchange(); @@ -191,6 +191,8 @@ private: bool tryStreamToViews(); bool hasDependencies(const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); + static String getRandomName() { std::uniform_int_distribution distribution('a', 'z'); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 5113dccda5b..6c7a8e06cc3 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -41,6 +41,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include @@ -331,6 +332,10 @@ StorageKeeperMap::StorageKeeperMap( setInMemoryMetadata(metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral(String(version_column_name), std::make_shared(), ""); + setVirtuals(virtuals); + WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" << "columns: " << metadata.columns.toString() @@ -634,12 +639,6 @@ void StorageKeeperMap::drop() dropTable(client, metadata_drop_lock); } -NamesAndTypesList StorageKeeperMap::getVirtuals() const -{ - return NamesAndTypesList{ - {std::string{version_column_name}, std::make_shared()}}; -} - namespace { diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 9dca96a24a3..d1ed282bac5 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -50,8 +50,6 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; void drop() override; - NamesAndTypesList getVirtuals() const override; - std::string getName() const override { return "KeeperMap"; } Names getPrimaryKey() const override { return {primary_key}; } diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 0dc0b1bff0b..1651b0499ad 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -22,9 +22,8 @@ namespace DB StorageMaterializedMySQL::StorageMaterializedMySQL(const StoragePtr & nested_storage_, const IDatabase * database_) : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_) { - StorageInMemoryMetadata in_memory_metadata; - in_memory_metadata = nested_storage->getInMemoryMetadata(); - setInMemoryMetadata(in_memory_metadata); + setInMemoryMetadata(nested_storage->getInMemoryMetadata()); + setVirtuals(*nested_storage->getVirtualsDescription()); } bool StorageMaterializedMySQL::needRewriteQueryWithFinal(const Names & column_names) const @@ -49,14 +48,6 @@ void StorageMaterializedMySQL::read( query_info, context, processed_stage, max_block_size, num_streams); } -NamesAndTypesList StorageMaterializedMySQL::getVirtuals() const -{ - if (const auto * db = typeid_cast(database)) - db->rethrowExceptionIfNeeded(); - - return nested_storage->getVirtuals(); -} - IStorage::ColumnSizeByName StorageMaterializedMySQL::getColumnSizes() const { auto sizes = nested_storage->getColumnSizes(); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 2cd589bfd75..9f5d157ce3b 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -34,7 +34,6 @@ public: SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) override { throwNotAllowed(); } - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; StoragePtr getNested() const override { return nested_storage; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 34edc5482f4..9e623001520 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -153,6 +153,8 @@ StorageMaterializedView::StorageMaterializedView( *query.refresh_strategy); refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty; } + + setVirtuals(*getTargetTable()->getVirtualsDescription()); } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -503,11 +505,6 @@ StoragePtr StorageMaterializedView::tryGetTargetTable() const return DatabaseCatalog::instance().tryGetTable(getTargetTableId(), getContext()); } -NamesAndTypesList StorageMaterializedView::getVirtuals() const -{ - return getTargetTable()->getVirtuals(); -} - Strings StorageMaterializedView::getDataPaths() const { if (auto table = tryGetTargetTable()) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 4d574a821ec..d716527b5ad 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -75,9 +75,6 @@ public: StoragePtr tryGetTargetTable() const; StorageID getTargetTableId() const; - /// Get the virtual column of the target table; - NamesAndTypesList getVirtuals() const override; - ActionLock getActionLock(StorageActionBlockType type) override; void onActionLockRemove(StorageActionBlockType action_type) override; diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 8b062a392d4..6959545430c 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -5,6 +5,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" namespace DB @@ -281,6 +282,8 @@ private: ContextPtr query_context, bool filter_by_database_virtual_column, bool filter_by_table_virtual_column) const; + + // static VirtualColumnsDescription createVirtuals(StoragePtr first_table); }; } diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 18a1f9086ae..a8e7fd528dd 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -30,7 +30,6 @@ public: bool hasEvenlyDistributedRead() const override { return getNested()->hasEvenlyDistributedRead(); } ColumnSizeByName getColumnSizes() const override { return getNested()->getColumnSizes(); } - NamesAndTypesList getVirtuals() const override { return getNested()->getVirtuals(); } QueryProcessingStage::Enum getQueryProcessingStage( ContextPtr context, diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 300b11b7346..191cdab1a40 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -12,12 +12,13 @@ StorageValues::StorageValues( const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, - const NamesAndTypesList & virtuals_) - : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) + const VirtualColumnsDescription & virtuals_) + : IStorage(table_id_), res_block(res_block_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(virtuals_); } Pipe StorageValues::read( diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index af1c134077b..0db3ecec888 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -11,8 +11,14 @@ namespace DB class StorageValues final : public IStorage { public: + /// Why we may have virtual columns in the storage from a single block? + /// Because it used as tmp storage for pushing blocks into views, and some + /// views may contain virtual columns from original storage. StorageValues( - const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, const NamesAndTypesList & virtuals_ = {}); + const StorageID & table_id_, + const ColumnsDescription & columns_, + const Block & res_block_, + const VirtualColumnsDescription & virtuals_ = {}); std::string getName() const override { return "Values"; } @@ -25,13 +31,6 @@ public: size_t max_block_size, size_t num_streams) override; - /// Why we may have virtual columns in the storage from a single block? - /// Because it used as tmp storage for pushing blocks into views, and some - /// views may contain virtual columns from original storage. - NamesAndTypesList getVirtuals() const override - { - return virtuals; - } /// FIXME probably it should return false, but StorageValues is used in ExecutingInnerQueryFromViewTransform (whatever it is) bool supportsTransactions() const override { return true; } @@ -40,7 +39,6 @@ public: private: Block res_block; - NamesAndTypesList virtuals; }; } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index c2ed35c5510..353c61e6347 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -16,6 +16,8 @@ #include #include +#include "Storages/System/IStorageSystemOneBlock.h" +#include "Storages/VirtualColumnsDescription.h" #include namespace DB @@ -52,6 +54,14 @@ catch (const DB::Exception &) } +StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_) + : DB::IStorageSystemOneBlock(storage_id_) +{ + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("key", std::make_shared(), ""); + setVirtuals(virtuals); +} + ColumnsDescription StorageSystemDictionaries::getColumnsDescription() { return ColumnsDescription @@ -92,13 +102,6 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() }; } -NamesAndTypesList StorageSystemDictionaries::getVirtuals() const -{ - return { - {"key", std::make_shared()} - }; -} - void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & /*query_info*/) const { const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 792b3c0dd30..e4f07e3c4bf 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Interpreters/StorageID.h" namespace DB @@ -12,12 +13,12 @@ class Context; class StorageSystemDictionaries final : public IStorageSystemOneBlock { public: + explicit StorageSystemDictionaries(const StorageID & storage_id_); + std::string getName() const override { return "SystemDictionaries"; } static ColumnsDescription getColumnsDescription(); - NamesAndTypesList getVirtuals() const override; - protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 48dab8c4777..b4900d26470 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -2,6 +2,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include #include @@ -259,12 +260,10 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Colu StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); setInMemoryMetadata(storage_metadata); + + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_state", std::make_shared(), ""); + setVirtuals(virtuals); } -NamesAndTypesList StorageSystemPartsBase::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_state", std::make_shared()) - }; -} } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 0a45d0f9dfe..204f093e46e 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -131,8 +131,6 @@ public: size_t max_block_size, size_t num_streams) override; - NamesAndTypesList getVirtuals() const override; - bool isSystemStorage() const override { return true; } private: From 0c902f8d648789f8cf1117c975f5db2bba1898b4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:28:54 +0100 Subject: [PATCH 079/356] Fix style --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index c3fbe6788c6..8477e011763 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -410,7 +410,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) cancel(); throw; } - + tasks.processAsyncTasks(); pool->wait(); } From 53714e6f7a2cb22ceb703f4cb7a14345b86a7fe9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Feb 2024 19:10:00 +0000 Subject: [PATCH 080/356] Refactor StorageSystemOneBlock --- src/Interpreters/InterpreterSystemQuery.cpp | 1 + src/Storages/RocksDB/StorageSystemRocksDB.cpp | 4 +- src/Storages/RocksDB/StorageSystemRocksDB.h | 4 +- src/Storages/System/IStorageSystemOneBlock.h | 44 ++++----------- ...rageSystemAggregateFunctionCombinators.cpp | 2 +- ...torageSystemAggregateFunctionCombinators.h | 4 +- .../System/StorageSystemAsyncLoader.cpp | 2 +- .../System/StorageSystemAsyncLoader.h | 4 +- .../StorageSystemAsynchronousInserts.cpp | 2 +- .../System/StorageSystemAsynchronousInserts.h | 4 +- .../StorageSystemAsynchronousMetrics.cpp | 4 +- .../System/StorageSystemAsynchronousMetrics.h | 4 +- src/Storages/System/StorageSystemBackups.cpp | 2 +- src/Storages/System/StorageSystemBackups.h | 4 +- .../System/StorageSystemBuildOptions.cpp | 2 +- .../System/StorageSystemBuildOptions.h | 4 +- .../System/StorageSystemCertificates.cpp | 2 +- .../System/StorageSystemCertificates.h | 4 +- src/Storages/System/StorageSystemClusters.cpp | 2 +- src/Storages/System/StorageSystemClusters.h | 4 +- .../System/StorageSystemCollations.cpp | 4 +- src/Storages/System/StorageSystemCollations.h | 4 +- .../System/StorageSystemContributors.cpp | 2 +- .../System/StorageSystemContributors.h | 4 +- .../System/StorageSystemCurrentRoles.cpp | 2 +- .../System/StorageSystemCurrentRoles.h | 4 +- .../System/StorageSystemDDLWorkerQueue.cpp | 2 +- .../System/StorageSystemDDLWorkerQueue.h | 4 +- .../System/StorageSystemDashboards.cpp | 2 +- src/Storages/System/StorageSystemDashboards.h | 4 +- .../System/StorageSystemDataTypeFamilies.cpp | 2 +- .../System/StorageSystemDataTypeFamilies.h | 4 +- .../System/StorageSystemDatabaseEngines.cpp | 2 +- .../System/StorageSystemDatabaseEngines.h | 4 +- .../System/StorageSystemDatabases.cpp | 9 ++- src/Storages/System/StorageSystemDatabases.h | 4 +- .../System/StorageSystemDictionaries.cpp | 2 +- .../System/StorageSystemDictionaries.h | 4 +- .../System/StorageSystemDistributionQueue.cpp | 4 +- .../System/StorageSystemDistributionQueue.h | 4 +- .../System/StorageSystemDroppedTables.cpp | 2 +- .../System/StorageSystemDroppedTables.h | 4 +- .../System/StorageSystemEnabledRoles.cpp | 2 +- .../System/StorageSystemEnabledRoles.h | 4 +- src/Storages/System/StorageSystemErrors.cpp | 2 +- src/Storages/System/StorageSystemErrors.h | 4 +- src/Storages/System/StorageSystemEvents.cpp | 2 +- src/Storages/System/StorageSystemEvents.h | 4 +- .../System/StorageSystemFilesystemCache.cpp | 4 +- .../System/StorageSystemFilesystemCache.h | 4 +- src/Storages/System/StorageSystemFormats.cpp | 2 +- src/Storages/System/StorageSystemFormats.h | 4 +- .../System/StorageSystemFunctions.cpp | 2 +- src/Storages/System/StorageSystemFunctions.h | 4 +- src/Storages/System/StorageSystemGrants.cpp | 2 +- src/Storages/System/StorageSystemGrants.h | 4 +- src/Storages/System/StorageSystemGraphite.cpp | 2 +- src/Storages/System/StorageSystemGraphite.h | 4 +- .../System/StorageSystemKafkaConsumers.cpp | 2 +- .../System/StorageSystemKafkaConsumers.h | 4 +- src/Storages/System/StorageSystemLicenses.cpp | 2 +- src/Storages/System/StorageSystemLicenses.h | 4 +- src/Storages/System/StorageSystemMacros.cpp | 2 +- src/Storages/System/StorageSystemMacros.h | 4 +- .../System/StorageSystemMergeTreeSettings.cpp | 2 +- .../System/StorageSystemMergeTreeSettings.h | 6 +- src/Storages/System/StorageSystemMerges.cpp | 2 +- src/Storages/System/StorageSystemMerges.h | 4 +- src/Storages/System/StorageSystemMetrics.cpp | 2 +- src/Storages/System/StorageSystemMetrics.h | 4 +- src/Storages/System/StorageSystemModels.cpp | 2 +- src/Storages/System/StorageSystemModels.h | 4 +- src/Storages/System/StorageSystemMoves.cpp | 2 +- src/Storages/System/StorageSystemMoves.h | 4 +- .../System/StorageSystemMutations.cpp | 4 +- src/Storages/System/StorageSystemMutations.h | 4 +- .../System/StorageSystemNamedCollections.cpp | 4 +- .../System/StorageSystemNamedCollections.h | 4 +- .../StorageSystemPartMovesBetweenShards.cpp | 4 +- .../StorageSystemPartMovesBetweenShards.h | 4 +- .../System/StorageSystemPrivileges.cpp | 2 +- src/Storages/System/StorageSystemPrivileges.h | 4 +- .../System/StorageSystemProcesses.cpp | 2 +- src/Storages/System/StorageSystemProcesses.h | 4 +- .../System/StorageSystemQueryCache.cpp | 4 +- src/Storages/System/StorageSystemQueryCache.h | 4 +- .../System/StorageSystemQuotaLimits.cpp | 2 +- .../System/StorageSystemQuotaLimits.h | 4 +- .../System/StorageSystemQuotaUsage.cpp | 2 +- src/Storages/System/StorageSystemQuotaUsage.h | 4 +- src/Storages/System/StorageSystemQuotas.cpp | 2 +- src/Storages/System/StorageSystemQuotas.h | 4 +- .../System/StorageSystemQuotasUsage.cpp | 2 +- .../System/StorageSystemQuotasUsage.h | 4 +- .../System/StorageSystemRemoteDataPaths.cpp | 1 + .../System/StorageSystemReplicatedFetches.cpp | 2 +- .../System/StorageSystemReplicatedFetches.h | 4 +- .../System/StorageSystemReplicationQueue.cpp | 4 +- .../System/StorageSystemReplicationQueue.h | 4 +- .../System/StorageSystemRoleGrants.cpp | 2 +- src/Storages/System/StorageSystemRoleGrants.h | 4 +- src/Storages/System/StorageSystemRoles.cpp | 2 +- src/Storages/System/StorageSystemRoles.h | 4 +- .../System/StorageSystemRowPolicies.cpp | 2 +- .../System/StorageSystemRowPolicies.h | 4 +- src/Storages/System/StorageSystemS3Queue.cpp | 4 +- src/Storages/System/StorageSystemS3Queue.h | 4 +- .../System/StorageSystemScheduler.cpp | 2 +- src/Storages/System/StorageSystemScheduler.h | 4 +- .../StorageSystemSchemaInferenceCache.cpp | 2 +- .../StorageSystemSchemaInferenceCache.h | 4 +- .../System/StorageSystemServerSettings.cpp | 2 +- .../System/StorageSystemServerSettings.h | 4 +- src/Storages/System/StorageSystemSettings.cpp | 2 +- src/Storages/System/StorageSystemSettings.h | 4 +- .../System/StorageSystemSettingsChanges.cpp | 2 +- .../System/StorageSystemSettingsChanges.h | 4 +- .../StorageSystemSettingsProfileElements.cpp | 2 +- .../StorageSystemSettingsProfileElements.h | 4 +- .../System/StorageSystemSettingsProfiles.cpp | 2 +- .../System/StorageSystemSettingsProfiles.h | 4 +- .../System/StorageSystemTableEngines.cpp | 2 +- .../System/StorageSystemTableEngines.h | 4 +- .../System/StorageSystemTableFunctions.cpp | 2 +- .../System/StorageSystemTableFunctions.h | 4 +- .../System/StorageSystemTimeZones.cpp | 2 +- src/Storages/System/StorageSystemTimeZones.h | 4 +- .../System/StorageSystemTransactions.cpp | 2 +- .../System/StorageSystemTransactions.h | 4 +- .../System/StorageSystemUserDirectories.cpp | 2 +- .../System/StorageSystemUserDirectories.h | 4 +- .../System/StorageSystemUserProcesses.cpp | 2 +- .../System/StorageSystemUserProcesses.h | 4 +- src/Storages/System/StorageSystemUsers.cpp | 2 +- src/Storages/System/StorageSystemUsers.h | 4 +- .../System/StorageSystemViewRefreshes.cpp | 2 +- .../System/StorageSystemViewRefreshes.h | 4 +- src/Storages/System/StorageSystemWarnings.cpp | 4 +- src/Storages/System/StorageSystemWarnings.h | 4 +- .../StorageSystemZooKeeperConnection.cpp | 2 +- .../System/StorageSystemZooKeeperConnection.h | 4 +- src/Storages/System/attachSystemTables.cpp | 56 +++++++++---------- src/Storages/System/attachSystemTablesImpl.h | 27 +++++++-- 143 files changed, 288 insertions(+), 288 deletions(-) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 266b2ac9fc9..d2c5eea9914 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index d0533b5ba0c..eec2f53381f 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemRocksDB::getColumnsDescription() } -void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -87,7 +87,7 @@ void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr con { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.h b/src/Storages/RocksDB/StorageSystemRocksDB.h index c1f10a7722d..ec351c75446 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.h +++ b/src/Storages/RocksDB/StorageSystemRocksDB.h @@ -11,7 +11,7 @@ class Context; /** Implements the `rocksdb` system table, which expose various rocksdb metrics. */ -class StorageSystemRocksDB final : public IStorageSystemOneBlock +class StorageSystemRocksDB final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRocksDB"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 3b2807965a4..a20434fd97e 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -1,13 +1,6 @@ #pragma once -#include -#include -#include #include -#include -#include -#include -#include namespace DB { @@ -17,8 +10,8 @@ class Context; /** IStorageSystemOneBlock is base class for system tables whose all columns can be synchronously fetched. * - * Client class need to provide static method static NamesAndTypesList getNamesAndTypes() that will return list of column names and - * their types. IStorageSystemOneBlock during read will create result columns in same order as result of getNamesAndTypes + * Client class need to provide columns_description. + * IStorageSystemOneBlock during read will create result columns in same order as in columns_description * and pass it with fillData method. * * Client also must override fillData and fill result columns. @@ -26,49 +19,32 @@ class Context; * If subclass want to support virtual columns, it should override getVirtuals method of IStorage interface. * IStorageSystemOneBlock will add virtuals columns at the end of result columns of fillData method. */ -template class IStorageSystemOneBlock : public IStorage { protected: - virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const = 0; virtual bool supportsColumnsMask() const { return false; } + friend class ReadFromSystemOneBlock; + public: - explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) + explicit IStorageSystemOneBlock(const StorageID & table_id_, ColumnsDescription columns_description) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(Self::getColumnsDescription()); + storage_metadata.setColumns(std::move(columns_description)); setInMemoryMetadata(storage_metadata); } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - size_t /*num_streams*/) override - { - storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); - - if (supportsColumnsMask()) - { - auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - query_info.columns_mask = std::move(columns_mask); - sample_block = std::move(header); - } - - MutableColumns res_columns = sample_block.cloneEmptyColumns(); - fillData(res_columns, context, query_info); - - UInt64 num_rows = res_columns.at(0)->size(); - Chunk chunk(std::move(res_columns), num_rows); - - return Pipe(std::make_shared(sample_block, std::move(chunk))); - } + size_t /*num_streams*/) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp index 8e32a137fcb..7f3994528a9 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp @@ -13,7 +13,7 @@ ColumnsDescription StorageSystemAggregateFunctionCombinators::getColumnsDescript }; } -void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & combinators = AggregateFunctionCombinatorFactory::instance().getAllAggregateFunctionCombinators(); for (const auto & pair : combinators) diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h index 6f3f90b58af..45036043636 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemAggregateFunctionCombinators final : public IStorageSystemOneBlock +class StorageSystemAggregateFunctionCombinators final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; public: diff --git a/src/Storages/System/StorageSystemAsyncLoader.cpp b/src/Storages/System/StorageSystemAsyncLoader.cpp index c56a3c3ce78..a7ffa282429 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.cpp +++ b/src/Storages/System/StorageSystemAsyncLoader.cpp @@ -74,7 +74,7 @@ ColumnsDescription StorageSystemAsyncLoader::getColumnsDescription() }; } -void StorageSystemAsyncLoader::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemAsyncLoader::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { TimePoint now = std::chrono::system_clock::now(); diff --git a/src/Storages/System/StorageSystemAsyncLoader.h b/src/Storages/System/StorageSystemAsyncLoader.h index fa0ce11efe3..685db264a10 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.h +++ b/src/Storages/System/StorageSystemAsyncLoader.h @@ -10,7 +10,7 @@ namespace DB class Context; /// system.asynchronous_loader table. Takes data from context.getAsyncLoader() -class StorageSystemAsyncLoader final : public IStorageSystemOneBlock +class StorageSystemAsyncLoader final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemAsyncLoader"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 20ba4d1cdfb..b5f413f2e20 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -30,7 +30,7 @@ ColumnsDescription StorageSystemAsynchronousInserts::getColumnsDescription() }; } -void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { using namespace std::chrono; diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.h b/src/Storages/System/StorageSystemAsynchronousInserts.h index 891494ffbeb..748937cffb8 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.h +++ b/src/Storages/System/StorageSystemAsynchronousInserts.h @@ -8,7 +8,7 @@ namespace DB /** Implements the system table `asynhronous_inserts`, * which contains information about pending asynchronous inserts in queue. */ -class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock +class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemAsynchronousInserts"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp index 58940a7e52e..a54b577d72b 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp @@ -19,11 +19,11 @@ ColumnsDescription StorageSystemAsynchronousMetrics::getColumnsDescription() StorageSystemAsynchronousMetrics::StorageSystemAsynchronousMetrics(const StorageID & table_id_, const AsynchronousMetrics & async_metrics_) - : IStorageSystemOneBlock(table_id_), async_metrics(async_metrics_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()), async_metrics(async_metrics_) { } -void StorageSystemAsynchronousMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemAsynchronousMetrics::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto async_metrics_values = async_metrics.getValues(); for (const auto & name_value : async_metrics_values) diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.h b/src/Storages/System/StorageSystemAsynchronousMetrics.h index 026377c77a0..3543b7684d7 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.h +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.h @@ -11,7 +11,7 @@ class Context; /** Implements system table asynchronous_metrics, which allows to get values of periodically (asynchronously) updated metrics. */ -class StorageSystemAsynchronousMetrics final : public IStorageSystemOneBlock +class StorageSystemAsynchronousMetrics final : public IStorageSystemOneBlock { public: StorageSystemAsynchronousMetrics(const StorageID & table_id_, const AsynchronousMetrics & async_metrics_); @@ -24,7 +24,7 @@ private: const AsynchronousMetrics & async_metrics; protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 0063d9e308f..fec92229556 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemBackups::getColumnsDescription() } -void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { size_t column_index = 0; auto & column_id = assert_cast(*res_columns[column_index++]); diff --git a/src/Storages/System/StorageSystemBackups.h b/src/Storages/System/StorageSystemBackups.h index a081bd52b4c..48e03d12e18 100644 --- a/src/Storages/System/StorageSystemBackups.h +++ b/src/Storages/System/StorageSystemBackups.h @@ -7,7 +7,7 @@ namespace DB { /// Implements `grants` system table, which allows you to get information about grants. -class StorageSystemBackups final : public IStorageSystemOneBlock +class StorageSystemBackups final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemBackups"; } @@ -15,7 +15,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp b/src/Storages/System/StorageSystemBuildOptions.cpp index c12935078af..a7144299eca 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp +++ b/src/Storages/System/StorageSystemBuildOptions.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemBuildOptions::getColumnsDescription() }; } -void StorageSystemBuildOptions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemBuildOptions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto * it = auto_config_build; *it; it += 2) { diff --git a/src/Storages/System/StorageSystemBuildOptions.h b/src/Storages/System/StorageSystemBuildOptions.h index 7c0bbf6b5fd..dbe651d7513 100644 --- a/src/Storages/System/StorageSystemBuildOptions.h +++ b/src/Storages/System/StorageSystemBuildOptions.h @@ -11,10 +11,10 @@ class Context; /** System table "build_options" with many params used for clickhouse building */ -class StorageSystemBuildOptions final : public IStorageSystemOneBlock +class StorageSystemBuildOptions final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index c372e4b32b0..0e4c5648b74 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -169,7 +169,7 @@ static void enumCertificates(const std::string & dir, bool def, MutableColumns & #endif -void StorageSystemCertificates::fillData([[maybe_unused]] MutableColumns & res_columns, ContextPtr/* context*/, const SelectQueryInfo &) const +void StorageSystemCertificates::fillData([[maybe_unused]] MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { #if USE_SSL const auto & ca_paths = Poco::Net::SSLManager::instance().defaultServerContext()->getCAPaths(); diff --git a/src/Storages/System/StorageSystemCertificates.h b/src/Storages/System/StorageSystemCertificates.h index f8c8477c998..bc3fef83853 100644 --- a/src/Storages/System/StorageSystemCertificates.h +++ b/src/Storages/System/StorageSystemCertificates.h @@ -13,7 +13,7 @@ class Cluster; * that allows to obtain information about available certificates * and their sources. */ -class StorageSystemCertificates final : public IStorageSystemOneBlock +class StorageSystemCertificates final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemCertificates"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 6172d4329cd..3c01b4717cc 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() return description; } -void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (const auto & name_and_cluster : context->getClusters()) writeCluster(res_columns, name_and_cluster, {}); diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 7b568641cb2..0f7c792261d 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -15,7 +15,7 @@ class Cluster; * that allows to obtain information about available clusters * (which may be specified in Distributed tables). */ -class StorageSystemClusters final : public IStorageSystemOneBlock +class StorageSystemClusters final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemClusters"; } @@ -26,7 +26,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; using NameAndCluster = std::pair>; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); }; diff --git a/src/Storages/System/StorageSystemCollations.cpp b/src/Storages/System/StorageSystemCollations.cpp index 2e5d11943af..5ef075e53be 100644 --- a/src/Storages/System/StorageSystemCollations.cpp +++ b/src/Storages/System/StorageSystemCollations.cpp @@ -1,6 +1,8 @@ #include #include +#include #include +#include namespace DB { @@ -14,7 +16,7 @@ ColumnsDescription StorageSystemCollations::getColumnsDescription() }; } -void StorageSystemCollations::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemCollations::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [locale, lang]: AvailableCollationLocales::instance().getAvailableCollations()) { diff --git a/src/Storages/System/StorageSystemCollations.h b/src/Storages/System/StorageSystemCollations.h index 1fc0ff0e024..449e0d8ac06 100644 --- a/src/Storages/System/StorageSystemCollations.h +++ b/src/Storages/System/StorageSystemCollations.h @@ -5,10 +5,10 @@ namespace DB { -class StorageSystemCollations final : public IStorageSystemOneBlock +class StorageSystemCollations final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemContributors.cpp b/src/Storages/System/StorageSystemContributors.cpp index 860a96c4388..f928337e1bd 100644 --- a/src/Storages/System/StorageSystemContributors.cpp +++ b/src/Storages/System/StorageSystemContributors.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemContributors::getColumnsDescription() }; } -void StorageSystemContributors::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemContributors::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { std::vector contributors; for (auto * it = auto_contributors; *it; ++it) diff --git a/src/Storages/System/StorageSystemContributors.h b/src/Storages/System/StorageSystemContributors.h index ed983c5e61f..f8d082542d2 100644 --- a/src/Storages/System/StorageSystemContributors.h +++ b/src/Storages/System/StorageSystemContributors.h @@ -9,10 +9,10 @@ class Context; /** System table "contributors" with list of clickhouse contributors */ -class StorageSystemContributors final : public IStorageSystemOneBlock +class StorageSystemContributors final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemCurrentRoles.cpp b/src/Storages/System/StorageSystemCurrentRoles.cpp index 88bdf088175..bfa3a7c2e18 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.cpp +++ b/src/Storages/System/StorageSystemCurrentRoles.cpp @@ -22,7 +22,7 @@ ColumnsDescription StorageSystemCurrentRoles::getColumnsDescription() } -void StorageSystemCurrentRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemCurrentRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto roles_info = context->getRolesInfo(); auto user = context->getUser(); diff --git a/src/Storages/System/StorageSystemCurrentRoles.h b/src/Storages/System/StorageSystemCurrentRoles.h index 4cc9b11d3f4..db1245e0ea7 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.h +++ b/src/Storages/System/StorageSystemCurrentRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `current_roles` system table, which allows you to get information about current roles. -class StorageSystemCurrentRoles final : public IStorageSystemOneBlock +class StorageSystemCurrentRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemCurrentRoles"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index c133a1b597c..ac5dd6c05d0 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -204,7 +204,7 @@ static void fillStatusColumns(MutableColumns & res_columns, size_t & col, } -void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto& ddl_worker = context->getDDLWorker(); fs::path ddl_zookeeper_path = ddl_worker.getQueueDir(); diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.h b/src/Storages/System/StorageSystemDDLWorkerQueue.h index 871bb706f94..eaffb488322 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.h +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.h @@ -11,10 +11,10 @@ class Context; /** System table "distributed_ddl_queue" with list of queries that are currently in the DDL worker queue. */ -class StorageSystemDDLWorkerQueue final : public IStorageSystemOneBlock +class StorageSystemDDLWorkerQueue final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 7c9e8b73519..23d8fcfc481 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -22,7 +22,7 @@ String trim(const char * text) return String(view); } -void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { static const std::vector> dashboards { diff --git a/src/Storages/System/StorageSystemDashboards.h b/src/Storages/System/StorageSystemDashboards.h index 83a8664ad27..f3e957e06c5 100644 --- a/src/Storages/System/StorageSystemDashboards.h +++ b/src/Storages/System/StorageSystemDashboards.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemDashboards final : public IStorageSystemOneBlock +class StorageSystemDashboards final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDashboards"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.cpp b/src/Storages/System/StorageSystemDataTypeFamilies.cpp index 821a43ed530..da607ade410 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.cpp +++ b/src/Storages/System/StorageSystemDataTypeFamilies.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemDataTypeFamilies::getColumnsDescription() }; } -void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & factory = DataTypeFactory::instance(); auto names = factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.h b/src/Storages/System/StorageSystemDataTypeFamilies.h index 2cb834f6931..6322c3bcfdd 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.h +++ b/src/Storages/System/StorageSystemDataTypeFamilies.h @@ -5,10 +5,10 @@ namespace DB { -class StorageSystemDataTypeFamilies final : public IStorageSystemOneBlock +class StorageSystemDataTypeFamilies final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDatabaseEngines.cpp b/src/Storages/System/StorageSystemDatabaseEngines.cpp index 514ca6d0ab3..59ba60fa871 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.cpp +++ b/src/Storages/System/StorageSystemDatabaseEngines.cpp @@ -14,7 +14,7 @@ ColumnsDescription StorageSystemDatabaseEngines::getColumnsDescription() }; } -void StorageSystemDatabaseEngines::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDatabaseEngines::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [engine, _] : DatabaseFactory::instance().getDatabaseEngines()) { diff --git a/src/Storages/System/StorageSystemDatabaseEngines.h b/src/Storages/System/StorageSystemDatabaseEngines.h index 16b517c91e6..3af13598c17 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.h +++ b/src/Storages/System/StorageSystemDatabaseEngines.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemDatabaseEngines final : public IStorageSystemOneBlock +class StorageSystemDatabaseEngines final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 51ecb8f17ca..f5537b508ba 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -72,7 +72,7 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return engine_full; } -static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQueryInfo & query_info, ContextPtr context) +static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context) { MutableColumnPtr name_column = ColumnString::create(); MutableColumnPtr engine_column = ColumnString::create(); @@ -94,17 +94,17 @@ static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQ ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine"), ColumnWithTypeAndName(std::move(uuid_column), std::make_shared(), "uuid") }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); return block.getByPosition(0).column; } -void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); const auto databases = DatabaseCatalog::instance().getDatabases(); - ColumnPtr filtered_databases_column = getFilteredDatabases(databases, query_info, context); + ColumnPtr filtered_databases_column = getFilteredDatabases(databases, predicate, context); for (size_t i = 0; i < filtered_databases_column->size(); ++i) { @@ -120,7 +120,6 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c size_t src_index = 0; size_t res_index = 0; - const auto & columns_mask = query_info.columns_mask; if (columns_mask[src_index++]) res_columns[res_index++]->insert(database_name); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 2fd9ccdc970..fa55f0aea32 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -11,7 +11,7 @@ class Context; /** Implements `databases` system table, which allows you to get information about all databases. */ -class StorageSystemDatabases final : public IStorageSystemOneBlock +class StorageSystemDatabases final : public IStorageSystemOneBlock { public: std::string getName() const override @@ -26,7 +26,7 @@ protected: bool supportsColumnsMask() const override { return true; } - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const override; }; } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index c2ed35c5510..b3a60e16d05 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -99,7 +99,7 @@ NamesAndTypesList StorageSystemDictionaries::getVirtuals() const }; } -void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & /*query_info*/) const +void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_dictionaries = access->isGranted(AccessType::SHOW_DICTIONARIES); diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 792b3c0dd30..1a071fda872 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -9,7 +9,7 @@ namespace DB class Context; -class StorageSystemDictionaries final : public IStorageSystemOneBlock +class StorageSystemDictionaries final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDictionaries"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index f0247275a8b..50c6436f316 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -107,7 +107,7 @@ ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription() } -void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -159,7 +159,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, Cont { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h index 477a9d6e245..159a86bf082 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.h +++ b/src/Storages/System/StorageSystemDistributionQueue.h @@ -11,7 +11,7 @@ class Context; /** Implements the `distribution_queue` system table, which allows you to view the INSERT queues for the Distributed tables. */ -class StorageSystemDistributionQueue final : public IStorageSystemOneBlock +class StorageSystemDistributionQueue final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDistributionQueue"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDroppedTables.cpp b/src/Storages/System/StorageSystemDroppedTables.cpp index a95127847f8..528f4d8995d 100644 --- a/src/Storages/System/StorageSystemDroppedTables.cpp +++ b/src/Storages/System/StorageSystemDroppedTables.cpp @@ -29,7 +29,7 @@ ColumnsDescription StorageSystemDroppedTables::getColumnsDescription() } -void StorageSystemDroppedTables::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDroppedTables::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); diff --git a/src/Storages/System/StorageSystemDroppedTables.h b/src/Storages/System/StorageSystemDroppedTables.h index d7c3569eb62..4fc620ab962 100644 --- a/src/Storages/System/StorageSystemDroppedTables.h +++ b/src/Storages/System/StorageSystemDroppedTables.h @@ -6,7 +6,7 @@ namespace DB { -class StorageSystemDroppedTables final : public IStorageSystemOneBlock +class StorageSystemDroppedTables final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMarkedDroppedTables"; } @@ -14,7 +14,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemEnabledRoles.cpp b/src/Storages/System/StorageSystemEnabledRoles.cpp index 6dbb6f18488..42806a832cf 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.cpp +++ b/src/Storages/System/StorageSystemEnabledRoles.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemEnabledRoles::getColumnsDescription() } -void StorageSystemEnabledRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemEnabledRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto roles_info = context->getRolesInfo(); auto user = context->getUser(); diff --git a/src/Storages/System/StorageSystemEnabledRoles.h b/src/Storages/System/StorageSystemEnabledRoles.h index 5367b2ccbea..1b11d025367 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.h +++ b/src/Storages/System/StorageSystemEnabledRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `enabled_roles` system table, which allows you to get information about enabled roles. -class StorageSystemEnabledRoles final : public IStorageSystemOneBlock +class StorageSystemEnabledRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemEnabledRoles"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 730e4cf05da..2da268305f8 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemErrors::getColumnsDescription() } -void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto add_row = [&](std::string_view name, size_t code, const auto & error, bool remote) { diff --git a/src/Storages/System/StorageSystemErrors.h b/src/Storages/System/StorageSystemErrors.h index 9e8ec628bac..bc86c085ff1 100644 --- a/src/Storages/System/StorageSystemErrors.h +++ b/src/Storages/System/StorageSystemErrors.h @@ -13,7 +13,7 @@ class Context; * Implements the `errors` system table, which shows the error code and the number of times it happens * (i.e. Exception with this code had been thrown). */ -class StorageSystemErrors final : public IStorageSystemOneBlock +class StorageSystemErrors final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemErrors"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index 5d9bc3a773a..822d5c77788 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemEvents::getColumnsDescription() return description; } -void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index cbd92f90d7a..9217fdfb53e 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -10,7 +10,7 @@ class Context; /** Implements `events` system table, which allows you to obtain information for profiling. */ -class StorageSystemEvents final : public IStorageSystemOneBlock +class StorageSystemEvents final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemEvents"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index d2bd085f934..53cd76e4219 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -36,11 +36,11 @@ ColumnsDescription StorageSystemFilesystemCache::getColumnsDescription() } StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto caches = FileCacheFactory::instance().getAll(); diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index 4b13b375f95..ea49fd16ba2 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -29,7 +29,7 @@ namespace DB * FORMAT Vertical */ -class StorageSystemFilesystemCache final : public IStorageSystemOneBlock +class StorageSystemFilesystemCache final : public IStorageSystemOneBlock { public: explicit StorageSystemFilesystemCache(const StorageID & table_id_); @@ -39,7 +39,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index a360971e1f7..0d8a5f8bd47 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -18,7 +18,7 @@ ColumnsDescription StorageSystemFormats::getColumnsDescription() }; } -void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & formats = FormatFactory::instance().getAllFormats(); for (const auto & pair : formats) diff --git a/src/Storages/System/StorageSystemFormats.h b/src/Storages/System/StorageSystemFormats.h index 9f9d1df1bde..f93641ee8a8 100644 --- a/src/Storages/System/StorageSystemFormats.h +++ b/src/Storages/System/StorageSystemFormats.h @@ -4,10 +4,10 @@ namespace DB { -class StorageSystemFormats final : public IStorageSystemOneBlock +class StorageSystemFormats final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 45c00e6de27..1184ef2c6bf 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -133,7 +133,7 @@ ColumnsDescription StorageSystemFunctions::getColumnsDescription() }; } -void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & functions_factory = FunctionFactory::instance(); const auto & function_names = functions_factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index ac1129e8127..c6f85d436fc 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -12,7 +12,7 @@ class Context; /** Implements `functions`system table, which allows you to get a list * all normal and aggregate functions. */ -class StorageSystemFunctions final : public IStorageSystemOneBlock +class StorageSystemFunctions final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemFunctions"; } @@ -25,7 +25,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index f5f3fa07e53..afa49536983 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -38,7 +38,7 @@ ColumnsDescription StorageSystemGrants::getColumnsDescription() } -void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemGrants.h b/src/Storages/System/StorageSystemGrants.h index 2202b52ad5f..6bf3793c3dc 100644 --- a/src/Storages/System/StorageSystemGrants.h +++ b/src/Storages/System/StorageSystemGrants.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `grants` system table, which allows you to get information about grants. -class StorageSystemGrants final : public IStorageSystemOneBlock +class StorageSystemGrants final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemGrants"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index ffeb1b6c890..a638a08fac7 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -75,7 +75,7 @@ static StorageSystemGraphite::Configs getConfigs(ContextPtr context) return graphite_configs; } -void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { Configs graphite_configs = getConfigs(context); diff --git a/src/Storages/System/StorageSystemGraphite.h b/src/Storages/System/StorageSystemGraphite.h index be101181cf7..78379afac9d 100644 --- a/src/Storages/System/StorageSystemGraphite.h +++ b/src/Storages/System/StorageSystemGraphite.h @@ -10,7 +10,7 @@ namespace DB { /// Provides information about Graphite configuration. -class StorageSystemGraphite final : public IStorageSystemOneBlock +class StorageSystemGraphite final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemGraphite"; } @@ -30,7 +30,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 95962d8de8b..6c9b1681c8b 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -47,7 +47,7 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() }; } -void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h index ae2c726849d..8d1fd504810 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.h +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -11,7 +11,7 @@ namespace DB { -class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock +class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemKafkaConsumers"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemLicenses.cpp b/src/Storages/System/StorageSystemLicenses.cpp index c5c04b4eb94..2faf772aca4 100644 --- a/src/Storages/System/StorageSystemLicenses.cpp +++ b/src/Storages/System/StorageSystemLicenses.cpp @@ -19,7 +19,7 @@ ColumnsDescription StorageSystemLicenses::getColumnsDescription() }; } -void StorageSystemLicenses::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemLicenses::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto * it = library_licenses; *it; it += 4) { diff --git a/src/Storages/System/StorageSystemLicenses.h b/src/Storages/System/StorageSystemLicenses.h index 57a3ff201a2..ab74a590dea 100644 --- a/src/Storages/System/StorageSystemLicenses.h +++ b/src/Storages/System/StorageSystemLicenses.h @@ -10,10 +10,10 @@ class Context; /** System table "licenses" with list of licenses of 3rd party libraries */ -class StorageSystemLicenses final : public IStorageSystemOneBlock +class StorageSystemLicenses final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMacros.cpp b/src/Storages/System/StorageSystemMacros.cpp index 6c1a24d152a..82408fd5a7e 100644 --- a/src/Storages/System/StorageSystemMacros.cpp +++ b/src/Storages/System/StorageSystemMacros.cpp @@ -15,7 +15,7 @@ ColumnsDescription StorageSystemMacros::getColumnsDescription() }; } -void StorageSystemMacros::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMacros::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto macros = context->getMacros(); diff --git a/src/Storages/System/StorageSystemMacros.h b/src/Storages/System/StorageSystemMacros.h index ffbeb70796e..c272985e978 100644 --- a/src/Storages/System/StorageSystemMacros.h +++ b/src/Storages/System/StorageSystemMacros.h @@ -12,7 +12,7 @@ class Context; /** Information about macros for introspection. */ -class StorageSystemMacros final : public IStorageSystemOneBlock +class StorageSystemMacros final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMacros"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 85caa572edd..7781e3789a4 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -31,7 +31,7 @@ ColumnsDescription SystemMergeTreeSettings::getColumnsDescription() } template -void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & settings = replicated ? context->getReplicatedMergeTreeSettings() : context->getMergeTreeSettings(); auto constraints_and_current_profiles = context->getSettingsConstraintsAndCurrentProfiles(); diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index 48e83f0a880..e2913a7e55b 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -14,7 +14,7 @@ class Context; * which allows to get information about the current MergeTree settings. */ template -class SystemMergeTreeSettings final : public IStorageSystemOneBlock> +class SystemMergeTreeSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return replicated ? "SystemReplicatedMergeTreeSettings" : "SystemMergeTreeSettings"; } @@ -22,9 +22,9 @@ public: static ColumnsDescription getColumnsDescription(); protected: - using IStorageSystemOneBlock>::IStorageSystemOneBlock; + using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 4129e4c235b..fac653e524e 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemMerges::getColumnsDescription() } -void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemMerges.h b/src/Storages/System/StorageSystemMerges.h index 961d28daf9a..fd9077c56d5 100644 --- a/src/Storages/System/StorageSystemMerges.h +++ b/src/Storages/System/StorageSystemMerges.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemMerges final : public IStorageSystemOneBlock +class StorageSystemMerges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMerges"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMetrics.cpp b/src/Storages/System/StorageSystemMetrics.cpp index 30544d66070..ae34a04cd87 100644 --- a/src/Storages/System/StorageSystemMetrics.cpp +++ b/src/Storages/System/StorageSystemMetrics.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemMetrics::getColumnsDescription() return description; } -void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) { diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index ec0c67cf6b7..829fc231a79 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -11,7 +11,7 @@ class Context; /** Implements `metrics` system table, which provides information about the operation of the server. */ -class StorageSystemMetrics final : public IStorageSystemOneBlock +class StorageSystemMetrics final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMetrics"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index 03b659d79bd..e715238ddd4 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemModels::getColumnsDescription() }; } -void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto bridge_helper = std::make_unique(context); ExternalModelInfos infos = bridge_helper->listModels(); diff --git a/src/Storages/System/StorageSystemModels.h b/src/Storages/System/StorageSystemModels.h index 91fa3761743..419b623dac0 100644 --- a/src/Storages/System/StorageSystemModels.h +++ b/src/Storages/System/StorageSystemModels.h @@ -9,7 +9,7 @@ namespace DB class Context; -class StorageSystemModels final : public IStorageSystemOneBlock +class StorageSystemModels final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemModels"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMoves.cpp b/src/Storages/System/StorageSystemMoves.cpp index 9b749218283..2c43043d820 100644 --- a/src/Storages/System/StorageSystemMoves.cpp +++ b/src/Storages/System/StorageSystemMoves.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemMoves::getColumnsDescription() } -void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h index acdd9642f8f..45b1e9c6121 100644 --- a/src/Storages/System/StorageSystemMoves.h +++ b/src/Storages/System/StorageSystemMoves.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemMoves final : public IStorageSystemOneBlock +class StorageSystemMoves final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMoves"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 50545a55c7f..7d263d9468d 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -46,7 +46,7 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() } -void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -100,7 +100,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr c { col_table, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemMutations.h b/src/Storages/System/StorageSystemMutations.h index 2db6e0c17f1..c60157cd853 100644 --- a/src/Storages/System/StorageSystemMutations.h +++ b/src/Storages/System/StorageSystemMutations.h @@ -11,7 +11,7 @@ class Context; /// Implements the `mutations` system table, which provides information about the status of mutations /// in the MergeTree tables. -class StorageSystemMutations final : public IStorageSystemOneBlock +class StorageSystemMutations final : public IStorageSystemOneBlock { public: String getName() const override { return "SystemMutations"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 25401bb751b..156fa5e5a9b 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -25,11 +25,11 @@ ColumnsDescription StorageSystemNamedCollections::getColumnsDescription() } StorageSystemNamedCollections::StorageSystemNamedCollections(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemNamedCollections.h b/src/Storages/System/StorageSystemNamedCollections.h index 596df99be83..ab302b400fc 100644 --- a/src/Storages/System/StorageSystemNamedCollections.h +++ b/src/Storages/System/StorageSystemNamedCollections.h @@ -5,7 +5,7 @@ namespace DB { -class StorageSystemNamedCollections final : public IStorageSystemOneBlock +class StorageSystemNamedCollections final : public IStorageSystemOneBlock { public: explicit StorageSystemNamedCollections(const StorageID & table_id_); @@ -15,7 +15,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index 1a2646d3295..9dd2ba0b156 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -44,7 +44,7 @@ ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() } -void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -95,7 +95,7 @@ void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.h b/src/Storages/System/StorageSystemPartMovesBetweenShards.h index 93a26bcd1b7..6a859d4de80 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.h +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.h @@ -9,7 +9,7 @@ namespace DB class Context; -class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock +class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemShardMoves"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index f45f3c6ed01..0aca9921257 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -77,7 +77,7 @@ ColumnsDescription StorageSystemPrivileges::getColumnsDescription() } -void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { size_t column_index = 0; auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); diff --git a/src/Storages/System/StorageSystemPrivileges.h b/src/Storages/System/StorageSystemPrivileges.h index 4441cf78d5c..eaef7f0db6d 100644 --- a/src/Storages/System/StorageSystemPrivileges.h +++ b/src/Storages/System/StorageSystemPrivileges.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `privileges` system table, which allows you to get information about access types. -class StorageSystemPrivileges final : public IStorageSystemOneBlock +class StorageSystemPrivileges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemPrivileges"; } @@ -17,7 +17,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 6702e68b81e..b6fd1aabd45 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -81,7 +81,7 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() return description; } -void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { ProcessList::Info info = context->getProcessList().getInfo(true, true, true); diff --git a/src/Storages/System/StorageSystemProcesses.h b/src/Storages/System/StorageSystemProcesses.h index 3017f9fd367..eb241d4b1ae 100644 --- a/src/Storages/System/StorageSystemProcesses.h +++ b/src/Storages/System/StorageSystemProcesses.h @@ -11,7 +11,7 @@ class Context; /** Implements `processes` system table, which allows you to get information about the queries that are currently executing. */ -class StorageSystemProcesses final : public IStorageSystemOneBlock +class StorageSystemProcesses final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemProcesses"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 03111755904..e454012da3a 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -24,11 +24,11 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription() } StorageSystemQueryCache::StorageSystemQueryCache(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { QueryCachePtr query_cache = context->getQueryCache(); diff --git a/src/Storages/System/StorageSystemQueryCache.h b/src/Storages/System/StorageSystemQueryCache.h index 08ad30afb81..22856c2b1bb 100644 --- a/src/Storages/System/StorageSystemQueryCache.h +++ b/src/Storages/System/StorageSystemQueryCache.h @@ -5,7 +5,7 @@ namespace DB { -class StorageSystemQueryCache final : public IStorageSystemOneBlock +class StorageSystemQueryCache final : public IStorageSystemOneBlock { public: explicit StorageSystemQueryCache(const StorageID & table_id_); @@ -15,7 +15,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index f125a990a88..dba449d4f1d 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -75,7 +75,7 @@ ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() } -void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotaLimits.h b/src/Storages/System/StorageSystemQuotaLimits.h index acc977d0df7..a8385e878ca 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.h +++ b/src/Storages/System/StorageSystemQuotaLimits.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `quota_limits` system table, which allows you to get information about the limits set for quotas. -class StorageSystemQuotaLimits final : public IStorageSystemOneBlock +class StorageSystemQuotaLimits final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotaLimits"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index a91e8b7b2c1..2df36aee240 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -89,7 +89,7 @@ ColumnsDescription StorageSystemQuotaUsage::getColumnsDescriptionImpl(bool add_c } -void StorageSystemQuotaUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotaUsage::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotaUsage.h b/src/Storages/System/StorageSystemQuotaUsage.h index a3109e9ca31..3100098fe87 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.h +++ b/src/Storages/System/StorageSystemQuotaUsage.h @@ -12,7 +12,7 @@ struct QuotaUsage; /** Implements the `quota_usage` system table, which allows you to get information about * how the current user uses the quota. */ -class StorageSystemQuotaUsage final : public IStorageSystemOneBlock +class StorageSystemQuotaUsage final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotaUsage"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index ee302f2f163..641bbb319d5 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -66,7 +66,7 @@ ColumnsDescription StorageSystemQuotas::getColumnsDescription() } -void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotas.h b/src/Storages/System/StorageSystemQuotas.h index cafd8b921fa..76e6f1df536 100644 --- a/src/Storages/System/StorageSystemQuotas.h +++ b/src/Storages/System/StorageSystemQuotas.h @@ -9,7 +9,7 @@ class Context; /** Implements the `quotas` system tables, which allows you to get information about quotas. */ -class StorageSystemQuotas final : public IStorageSystemOneBlock +class StorageSystemQuotas final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotas"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotasUsage.cpp b/src/Storages/System/StorageSystemQuotasUsage.cpp index ed22f73dd50..1587048e7e7 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.cpp +++ b/src/Storages/System/StorageSystemQuotasUsage.cpp @@ -13,7 +13,7 @@ ColumnsDescription StorageSystemQuotasUsage::getColumnsDescription() return StorageSystemQuotaUsage::getColumnsDescriptionImpl(/* add_column_is_current = */ true); } -void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotasUsage.h b/src/Storages/System/StorageSystemQuotasUsage.h index ecdc62865d1..516e722f7df 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.h +++ b/src/Storages/System/StorageSystemQuotasUsage.h @@ -10,7 +10,7 @@ class Context; /** Implements the `quotas_usage` system table, which allows you to get information about * how all users use the quotas. */ -class StorageSystemQuotasUsage final : public IStorageSystemOneBlock +class StorageSystemQuotasUsage final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotasUsage"; } @@ -18,7 +18,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 87b7a84e8ba..55002d6d00f 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/System/StorageSystemReplicatedFetches.cpp b/src/Storages/System/StorageSystemReplicatedFetches.cpp index e643cb9f86a..6913665a8d2 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.cpp +++ b/src/Storages/System/StorageSystemReplicatedFetches.cpp @@ -34,7 +34,7 @@ ColumnsDescription StorageSystemReplicatedFetches::getColumnsDescription() }; } -void StorageSystemReplicatedFetches::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemReplicatedFetches::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemReplicatedFetches.h b/src/Storages/System/StorageSystemReplicatedFetches.h index a176912cac0..dba9124b39d 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.h +++ b/src/Storages/System/StorageSystemReplicatedFetches.h @@ -10,7 +10,7 @@ namespace DB class Context; /// system.replicated_fetches table. Takes data from context.getReplicatedFetchList() -class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock +class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemReplicatedFetches"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index 194a2ae6fb8..14b641f46c7 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -62,7 +62,7 @@ ColumnsDescription StorageSystemReplicationQueue::getColumnsDescription() } -void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -113,7 +113,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, Conte { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index 003e4eeb927..a9e57851be1 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -11,7 +11,7 @@ class Context; /** Implements the `replication_queue` system table, which allows you to view the replication queues for the replicated tables. */ -class StorageSystemReplicationQueue final : public IStorageSystemOneBlock +class StorageSystemReplicationQueue final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemReplicationQueue"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index 241481275c4..e5baeed4873 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -40,7 +40,7 @@ ColumnsDescription StorageSystemRoleGrants::getColumnsDescription() } -void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRoleGrants.h b/src/Storages/System/StorageSystemRoleGrants.h index 969f82f85d5..f82aece3f24 100644 --- a/src/Storages/System/StorageSystemRoleGrants.h +++ b/src/Storages/System/StorageSystemRoleGrants.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `role_grants` system table, which allows you to get information about granted roles. -class StorageSystemRoleGrants final : public IStorageSystemOneBlock +class StorageSystemRoleGrants final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRoleGrants"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index ec26a50db77..9bfddc25ebf 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemRoles::getColumnsDescription() } -void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRoles.h b/src/Storages/System/StorageSystemRoles.h index 2b4ae93a932..e96bb70d9cd 100644 --- a/src/Storages/System/StorageSystemRoles.h +++ b/src/Storages/System/StorageSystemRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `roles` system table, which allows you to get information about roles. -class StorageSystemRoles final : public IStorageSystemOneBlock +class StorageSystemRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRoles"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 5a959cdf9af..ea819e88993 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -61,7 +61,7 @@ ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() } -void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRowPolicies.h b/src/Storages/System/StorageSystemRowPolicies.h index f8aa5618126..cdbc4731000 100644 --- a/src/Storages/System/StorageSystemRowPolicies.h +++ b/src/Storages/System/StorageSystemRowPolicies.h @@ -10,7 +10,7 @@ class Context; /// Implements `row_policies` system table, which allows you to get information about row policies. -class StorageSystemRowPolicies final : public IStorageSystemOneBlock +class StorageSystemRowPolicies final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRowPolicies"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 3637734b225..557f0fd1208 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -37,11 +37,11 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() } StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) { diff --git a/src/Storages/System/StorageSystemS3Queue.h b/src/Storages/System/StorageSystemS3Queue.h index 1dc5c521941..c89e18f458f 100644 --- a/src/Storages/System/StorageSystemS3Queue.h +++ b/src/Storages/System/StorageSystemS3Queue.h @@ -7,7 +7,7 @@ namespace DB { -class StorageSystemS3Queue final : public IStorageSystemOneBlock +class StorageSystemS3Queue final : public IStorageSystemOneBlock { public: explicit StorageSystemS3Queue(const StorageID & table_id_); @@ -17,7 +17,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index ba07d44dbf9..cae42011fc5 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -80,7 +80,7 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() } -void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { context->getResourceManager()->forEachNode([&] (const String & resource, const String & path, const String & type, const SchedulerNodePtr & node) { diff --git a/src/Storages/System/StorageSystemScheduler.h b/src/Storages/System/StorageSystemScheduler.h index 1de72a85e9b..c6a259e5b51 100644 --- a/src/Storages/System/StorageSystemScheduler.h +++ b/src/Storages/System/StorageSystemScheduler.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `system.scheduler` table, which allows you to get information about scheduling nodes. -class StorageSystemScheduler final : public IStorageSystemOneBlock +class StorageSystemScheduler final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemScheduler"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 1426ea83800..634089bd1cd 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -72,7 +72,7 @@ static void fillDataImpl(MutableColumns & res_columns, SchemaCache & schema_cach } } -void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); #if USE_AWS_S3 diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.h b/src/Storages/System/StorageSystemSchemaInferenceCache.h index e6d306f8252..3e12f4b850b 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.h +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.h @@ -6,7 +6,7 @@ namespace DB { -class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock +class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsChanges"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index f390985546b..b75f4280877 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -55,7 +55,7 @@ ColumnsDescription StorageSystemServerSettings::getColumnsDescription() }; } -void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// When the server configuration file is periodically re-loaded from disk, the server components (e.g. memory tracking) are updated /// with new the setting values but the settings themselves are not stored between re-loads. As a result, if one wants to know the diff --git a/src/Storages/System/StorageSystemServerSettings.h b/src/Storages/System/StorageSystemServerSettings.h index 276f21d674b..03c363c9920 100644 --- a/src/Storages/System/StorageSystemServerSettings.h +++ b/src/Storages/System/StorageSystemServerSettings.h @@ -11,7 +11,7 @@ class Context; /** implements system table "settings", which allows to get information about the current settings. */ -class StorageSystemServerSettings final : public IStorageSystemOneBlock +class StorageSystemServerSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemServerSettings"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index 43877582af6..b437108b00e 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -34,7 +34,7 @@ ColumnsDescription StorageSystemSettings::getColumnsDescription() }; } -void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const Settings & settings = context->getSettingsRef(); auto constraints_and_current_profiles = context->getSettingsConstraintsAndCurrentProfiles(); diff --git a/src/Storages/System/StorageSystemSettings.h b/src/Storages/System/StorageSystemSettings.h index 6749f9b20a4..fae0d69ada8 100644 --- a/src/Storages/System/StorageSystemSettings.h +++ b/src/Storages/System/StorageSystemSettings.h @@ -11,7 +11,7 @@ class Context; /** implements system table "settings", which allows to get information about the current settings. */ -class StorageSystemSettings final : public IStorageSystemOneBlock +class StorageSystemSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettings"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index b1942ea9ac6..ecac76b73a3 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -24,7 +24,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() }; } -void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) { diff --git a/src/Storages/System/StorageSystemSettingsChanges.h b/src/Storages/System/StorageSystemSettingsChanges.h index 3a1a8ce23d1..9d8899797fe 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.h +++ b/src/Storages/System/StorageSystemSettingsChanges.h @@ -12,7 +12,7 @@ class Context; /** Implements system table "settings_changes", which allows to get information * about the settings changes through different ClickHouse versions. */ -class StorageSystemSettingsChanges final : public IStorageSystemOneBlock +class StorageSystemSettingsChanges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsChanges"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index a530bd2c1b3..6ac5d13a249 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -51,7 +51,7 @@ ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() } -void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.h b/src/Storages/System/StorageSystemSettingsProfileElements.h index 1dedd616c82..8b08c463071 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.h +++ b/src/Storages/System/StorageSystemSettingsProfileElements.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `settings_profile_elements` system table, which allows you to get information about elements of settings profiles. -class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock +class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfileElements"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 01041bee445..795152e31f3 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -36,7 +36,7 @@ ColumnsDescription StorageSystemSettingsProfiles::getColumnsDescription() } -void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemSettingsProfiles.h b/src/Storages/System/StorageSystemSettingsProfiles.h index b0c8fc8658c..056666ae4c7 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.h +++ b/src/Storages/System/StorageSystemSettingsProfiles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `settings_profiles` system table, which allows you to get information about profiles. -class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock +class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfiles"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemTableEngines.cpp b/src/Storages/System/StorageSystemTableEngines.cpp index c0cf95423d9..e27546aa2a4 100644 --- a/src/Storages/System/StorageSystemTableEngines.cpp +++ b/src/Storages/System/StorageSystemTableEngines.cpp @@ -26,7 +26,7 @@ ColumnsDescription StorageSystemTableEngines::getColumnsDescription() }; } -void StorageSystemTableEngines::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTableEngines::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & pair : StorageFactory::instance().getAllStorages()) { diff --git a/src/Storages/System/StorageSystemTableEngines.h b/src/Storages/System/StorageSystemTableEngines.h index 258b9d210b1..d7af471bb2d 100644 --- a/src/Storages/System/StorageSystemTableEngines.h +++ b/src/Storages/System/StorageSystemTableEngines.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemTableEngines final : public IStorageSystemOneBlock +class StorageSystemTableEngines final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemTableFunctions.cpp b/src/Storages/System/StorageSystemTableFunctions.cpp index 9fb8e11e4d1..94b7d73a67e 100644 --- a/src/Storages/System/StorageSystemTableFunctions.cpp +++ b/src/Storages/System/StorageSystemTableFunctions.cpp @@ -20,7 +20,7 @@ ColumnsDescription StorageSystemTableFunctions::getColumnsDescription() }; } -void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & factory = TableFunctionFactory::instance(); const auto & functions_names = factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemTableFunctions.h b/src/Storages/System/StorageSystemTableFunctions.h index 804c3b51940..07ef8857135 100644 --- a/src/Storages/System/StorageSystemTableFunctions.h +++ b/src/Storages/System/StorageSystemTableFunctions.h @@ -6,12 +6,12 @@ namespace DB { -class StorageSystemTableFunctions final : public IStorageSystemOneBlock +class StorageSystemTableFunctions final : public IStorageSystemOneBlock { protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; public: std::string getName() const override diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index 14f4ce0f5de..af997c6423f 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -16,7 +16,7 @@ ColumnsDescription StorageSystemTimeZones::getColumnsDescription() }; } -void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto * it = auto_time_zones; *it; ++it) res_columns[0]->insert(String(*it)); diff --git a/src/Storages/System/StorageSystemTimeZones.h b/src/Storages/System/StorageSystemTimeZones.h index f3743a1ef09..160e8854e3e 100644 --- a/src/Storages/System/StorageSystemTimeZones.h +++ b/src/Storages/System/StorageSystemTimeZones.h @@ -10,10 +10,10 @@ class Context; /** System table "time_zones" with list of timezones pulled from /contrib/cctz/testdata/zoneinfo */ -class StorageSystemTimeZones final : public IStorageSystemOneBlock +class StorageSystemTimeZones final : public IStorageSystemOneBlock { public: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index 47e44688c14..edc3739e713 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -34,7 +34,7 @@ ColumnsDescription StorageSystemTransactions::getColumnsDescription() }; } -void StorageSystemTransactions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTransactions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto list = TransactionLog::instance().getTransactionsList(); for (const auto & elem : list) diff --git a/src/Storages/System/StorageSystemTransactions.h b/src/Storages/System/StorageSystemTransactions.h index b5a538b7b55..1c400619c1a 100644 --- a/src/Storages/System/StorageSystemTransactions.h +++ b/src/Storages/System/StorageSystemTransactions.h @@ -8,7 +8,7 @@ namespace DB class Context; -class StorageSystemTransactions final : public IStorageSystemOneBlock +class StorageSystemTransactions final : public IStorageSystemOneBlock { public: String getName() const override { return "SystemTransactions"; } @@ -18,7 +18,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUserDirectories.cpp b/src/Storages/System/StorageSystemUserDirectories.cpp index 620c9746a4a..7b6c1144ae1 100644 --- a/src/Storages/System/StorageSystemUserDirectories.cpp +++ b/src/Storages/System/StorageSystemUserDirectories.cpp @@ -22,7 +22,7 @@ ColumnsDescription StorageSystemUserDirectories::getColumnsDescription() } -void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & access_control = context->getAccessControl(); auto storages = access_control.getStorages(); diff --git a/src/Storages/System/StorageSystemUserDirectories.h b/src/Storages/System/StorageSystemUserDirectories.h index bca6a9b5aa6..3cdaa877b81 100644 --- a/src/Storages/System/StorageSystemUserDirectories.h +++ b/src/Storages/System/StorageSystemUserDirectories.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `users_directories` system table, which allows you to get information about user directories. -class StorageSystemUserDirectories final : public IStorageSystemOneBlock +class StorageSystemUserDirectories final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUserDirectories"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp index 4fbbd7ab54d..d36129aea63 100644 --- a/src/Storages/System/StorageSystemUserProcesses.cpp +++ b/src/Storages/System/StorageSystemUserProcesses.cpp @@ -32,7 +32,7 @@ ColumnsDescription StorageSystemUserProcesses::getColumnsDescription() return description; } -void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto user_info = context->getProcessList().getUserInfo(true); diff --git a/src/Storages/System/StorageSystemUserProcesses.h b/src/Storages/System/StorageSystemUserProcesses.h index 6eb12e30559..3141eae9662 100644 --- a/src/Storages/System/StorageSystemUserProcesses.h +++ b/src/Storages/System/StorageSystemUserProcesses.h @@ -11,7 +11,7 @@ class Context; /** Implements `processes` system table, which allows you to get information about the queries that are currently executing. */ -class StorageSystemUserProcesses final : public IStorageSystemOneBlock +class StorageSystemUserProcesses final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUserProcesses"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index cf114a85645..4734aeaaa82 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -77,7 +77,7 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() } -void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemUsers.h b/src/Storages/System/StorageSystemUsers.h index cfa5947d370..a1a3d717b35 100644 --- a/src/Storages/System/StorageSystemUsers.h +++ b/src/Storages/System/StorageSystemUsers.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `users` system table, which allows you to get information about users. -class StorageSystemUsers final : public IStorageSystemOneBlock +class StorageSystemUsers final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUsers"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index d2b933e65a8..a0ed1d3c3d3 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -49,7 +49,7 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() } void StorageSystemViewRefreshes::fillData( - MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const + MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto access = context->getAccess(); auto valid_access = AccessType::SHOW_TABLES; diff --git a/src/Storages/System/StorageSystemViewRefreshes.h b/src/Storages/System/StorageSystemViewRefreshes.h index 02d3a39dfff..5a29f3a3bc8 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.h +++ b/src/Storages/System/StorageSystemViewRefreshes.h @@ -10,7 +10,7 @@ namespace DB { -class StorageSystemViewRefreshes final : public IStorageSystemOneBlock +class StorageSystemViewRefreshes final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemViewRefreshes"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemWarnings.cpp b/src/Storages/System/StorageSystemWarnings.cpp index e2579372b4d..01e96e980a8 100644 --- a/src/Storages/System/StorageSystemWarnings.cpp +++ b/src/Storages/System/StorageSystemWarnings.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include namespace DB @@ -13,7 +15,7 @@ ColumnsDescription StorageSystemWarnings::getColumnsDescription() }; } -void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (const auto & warning : context->getWarnings()) res_columns[0]->insert(warning); diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h index 42948a765ea..685fb60b430 100644 --- a/src/Storages/System/StorageSystemWarnings.h +++ b/src/Storages/System/StorageSystemWarnings.h @@ -11,7 +11,7 @@ class Context; /** Implements system.warnings table that contains warnings about server configuration * to be displayed in clickhouse-client. */ -class StorageSystemWarnings final : public IStorageSystemOneBlock +class StorageSystemWarnings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemWarnings"; } @@ -21,6 +21,6 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index c165bfa217d..8041370ee92 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() } void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, ContextPtr context, - const SelectQueryInfo &) const + const ActionsDAG::Node *, std::vector) const { const auto add_enabled_feature_flags = [&](const auto & zookeeper) { diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.h b/src/Storages/System/StorageSystemZooKeeperConnection.h index 2b6d3b2e516..f8263e1f1bc 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.h +++ b/src/Storages/System/StorageSystemZooKeeperConnection.h @@ -11,7 +11,7 @@ class Context; /** Implements `zookeeper_connection` system table, which allows you to get information about the connected zookeeper info. */ -class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock +class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemZooKeeperConnection"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index bf898f57833..f995c21326d 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -117,14 +117,14 @@ namespace DB void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, bool has_zookeeper) { - attach(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); - attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); - attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); - attach(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); - attach(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); + attachNoDescription(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); + attachNoDescription(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); + attachNoDescription(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); + attachNoDescription(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); + attachNoDescription(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); - attach(context, system_database, "tables", "Lists all tables of the current server."); - attach(context, system_database, "columns", "Lists all columns from all tables of the current server."); + attachNoDescription(context, system_database, "tables", "Lists all tables of the current server."); + attachNoDescription(context, system_database, "columns", "Lists all columns from all tables of the current server."); attach(context, system_database, "functions", "Contains a list of all available ordinary and aggregate functions with their descriptions."); attach(context, system_database, "events", "Contains profiling events and their current value."); attach(context, system_database, "settings", "Contains a list of all user-level settings (which can be modified in a scope of query or session), their current and default values along with descriptions."); @@ -158,43 +158,43 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "privileges", "Contains a list of all available privileges that could be granted to a user or role."); attach(context, system_database, "errors", "Contains a list of all errors which have ever happened including the error code, last time and message with unsymbolized stacktrace."); attach(context, system_database, "warnings", "Contains warnings about server configuration to be displayed by clickhouse-client right after it connects to the server."); - attach(context, system_database, "data_skipping_indices", "Contains all the information about all the data skipping indices in tables, similar to system.columns."); + attachNoDescription(context, system_database, "data_skipping_indices", "Contains all the information about all the data skipping indices in tables, similar to system.columns."); attach(context, system_database, "licenses", "Contains licenses of third-party libraries that are located in the contrib directory of ClickHouse sources."); attach(context, system_database, "time_zones", "Contains a list of time zones that are supported by the ClickHouse server. This list of timezones might vary depending on the version of ClickHouse."); attach(context, system_database, "backups", "Contains a list of all BACKUP or RESTORE operations with their current states and other propertis. Note, that table is not persistent and it shows only operations executed after the last server restart."); attach(context, system_database, "schema_inference_cache", "Contains information about all cached file schemas."); attach(context, system_database, "dropped_tables", "Contains a list of tables which were dropped from Atomic databases but not completely removed yet."); - attach(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables "); + attachNoDescription(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables "); attach(context, system_database, "scheduler", "Contains information and status for scheduling nodes residing on the local server."); #if defined(__ELF__) && !defined(OS_FREEBSD) - attach(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers."); + attachNoDescription(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers."); #endif #if USE_RDKAFKA attach(context, system_database, "kafka_consumers", "Contains information about Kafka consumers. Applicable for Kafka table engine (native ClickHouse integration)."); #endif #ifdef OS_LINUX - attach(context, system_database, "stack_trace", "Allows to obtain an unsymbolized stacktrace from all the threads of the server process."); + attachNoDescription(context, system_database, "stack_trace", "Allows to obtain an unsymbolized stacktrace from all the threads of the server process."); #endif #if USE_ROCKSDB attach(context, system_database, "rocksdb", "Contains a list of metrics exposed from embedded RocksDB."); #endif #if USE_MYSQL - attach(context, system_database, "mysql_binlogs", "Shows a list of active binlogs for MaterializedMySQL."); + attachNoDescription(context, system_database, "mysql_binlogs", "Shows a list of active binlogs for MaterializedMySQL."); #endif - attach(context, system_database, "parts", "Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row."); - attach(context, system_database, "projection_parts", "Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster."); - attach(context, system_database, "detached_parts", "Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn't use such parts anyhow."); - attach(context, system_database, "parts_columns", "Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row."); - attach(context, system_database, "projection_parts_columns", "Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row."); - attach(context, system_database, "disks", "Contains information about disks defined in the server configuration."); - attach(context, system_database, "storage_policies", "Contains information about storage policies and volumes defined in the server configuration."); + attachNoDescription(context, system_database, "parts", "Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row."); + attachNoDescription(context, system_database, "projection_parts", "Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster."); + attachNoDescription(context, system_database, "detached_parts", "Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn't use such parts anyhow."); + attachNoDescription(context, system_database, "parts_columns", "Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row."); + attachNoDescription(context, system_database, "projection_parts_columns", "Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row."); + attachNoDescription(context, system_database, "disks", "Contains information about disks defined in the server configuration."); + attachNoDescription(context, system_database, "storage_policies", "Contains information about storage policies and volumes defined in the server configuration."); attach(context, system_database, "processes", "Contains a list of currently executing processes (queries) with their progress."); attach(context, system_database, "metrics", "Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date."); attach(context, system_database, "merges", "Contains a list of merges currently executing merges of MergeTree tables and their progress. Each merge operation is represented by a single row."); attach(context, system_database, "moves", "Contains information about in-progress data part moves of MergeTree tables. Each data part movement is represented by a single row."); attach(context, system_database, "mutations", "Contains a list of mutations and their progress. Each mutation command is represented by a single row."); - attach(context, system_database, "replicas", "Contains information and status of all table replicas on current server. Each replica is represented by a single row."); + attachNoDescription(context, system_database, "replicas", "Contains information and status of all table replicas on current server. Each replica is represented by a single row."); attach(context, system_database, "replication_queue", "Contains information about tasks from replication queues stored in ClickHouse Keeper, or ZooKeeper, for each table replica."); attach(context, system_database, "distributed_ddl_queue", "Contains information about distributed DDL queries (ON CLUSTER clause) that were executed on a cluster."); attach(context, system_database, "distribution_queue", "Contains information about local files that are in the queue to be sent to the shards. These local files contain new parts that are created by inserting new data into the Distributed table in asynchronous mode."); @@ -206,21 +206,21 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "replicated_fetches", "Contains information about currently running background fetches."); attach(context, system_database, "part_moves_between_shards", "Contains information about parts which are currently in a process of moving between shards and their progress."); attach(context, system_database, "asynchronous_inserts", "Contains information about pending asynchronous inserts in queue in server's memory."); - attach(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); - attach(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); - attach(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); + attachNoDescription(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); + attachNoDescription(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); + attachNoDescription(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); attach(context, system_database, "certificates", "Contains information about available certificates and their sources."); - attach(context, system_database, "named_collections", "Contains a list of all named collections which were created via SQL query or parsed from configuration file."); + attachNoDescription(context, system_database, "named_collections", "Contains a list of all named collections which were created via SQL query or parsed from configuration file."); attach(context, system_database, "asynchronous_loader", "Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job."); attach(context, system_database, "user_processes", "This system table can be used to get overview of memory usage and ProfileEvents of users."); - attach(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); - attach(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); + attachNoDescription(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); + attachNoDescription(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); attach(context, system_database, "dashboards", "Contains queries used by /dashboard page accessible though HTTP interface. This table can be useful for monitoring and troubleshooting. The table contains a row for every chart in a dashboard."); attach(context, system_database, "view_refreshes", "Lists all Refreshable Materialized Views of current server."); if (has_zookeeper) { - attach(context, system_database, "zookeeper", "Exposes data from the [Zoo]Keeper cluster defined in the config. Allow to get the list of children for a particular node or read the value written inside it."); + attachNoDescription(context, system_database, "zookeeper", "Exposes data from the [Zoo]Keeper cluster defined in the config. Allow to get the list of children for a particular node or read the value written inside it."); attach(context, system_database, "zookeeper_connection", "Shows the information about current connections to [Zoo]Keeper (including auxiliary [ZooKeepers)"); } @@ -230,7 +230,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b void attachSystemTablesAsync(ContextPtr context, IDatabase & system_database, AsynchronousMetrics & async_metrics) { - attach(context, system_database, "asynchronous_metrics", "Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.", async_metrics); + attachNoDescription(context, system_database, "asynchronous_metrics", "Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.", async_metrics); } } diff --git a/src/Storages/System/attachSystemTablesImpl.h b/src/Storages/System/attachSystemTablesImpl.h index 9f2c4e8016d..d9ab164f2b3 100644 --- a/src/Storages/System/attachSystemTablesImpl.h +++ b/src/Storages/System/attachSystemTablesImpl.h @@ -10,8 +10,8 @@ namespace DB template using StringLiteral = const char(&)[Length]; -template -void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +template +void attachImpl(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) { static_assert(CommentSize > 15, "The comment for a system table is too short or empty"); assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE); @@ -21,7 +21,10 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl { /// Attach to Ordinary database. table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name); - system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...)); + if constexpr (with_description) + system_database.attachTable(context, table_name, std::make_shared(table_id, StorageT::getColumnsDescription(), std::forward(args)...)); + else + system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...)); } else { @@ -31,7 +34,10 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4()); DatabaseCatalog::instance().addUUIDMapping(table_id.uuid); String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid); - system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...), path); + if constexpr (with_description) + system_database.attachTable(context, table_name, std::make_shared(table_id, StorageT::getColumnsDescription(), std::forward(args)...), path); + else + system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...), path); } /// Set the comment @@ -42,4 +48,17 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl table->setInMemoryMetadata(metadata); } + +template +void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +{ + attachImpl(context, system_database, table_name, comment, std::forward(args)...); +} + +template +void attachNoDescription(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +{ + attachImpl(context, system_database, table_name, comment, std::forward(args)...); +} + } From 0ceeb13b7ff58752b199482d3683a381d4af4b00 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Feb 2024 19:58:55 +0000 Subject: [PATCH 081/356] refactoring of virtual columns --- src/Interpreters/InterpreterCreateQuery.cpp | 3 +- src/Interpreters/MutationsInterpreter.cpp | 194 ++++-------------- src/Interpreters/TreeRewriter.cpp | 49 ++--- src/Storages/AlterCommands.cpp | 46 ++--- src/Storages/IStorage.cpp | 2 +- src/Storages/LightweightDeleteDescription.cpp | 9 - src/Storages/LightweightDeleteDescription.h | 13 -- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 + src/Storages/MergeTree/IMergeTreeDataPart.h | 3 +- src/Storages/MergeTree/MergeTask.cpp | 17 +- src/Storages/MergeTree/MergeTreeData.cpp | 5 +- src/Storages/MergeTree/MergeTreeData.h | 4 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 6 - .../MergeTree/MergeTreePrefetchedReadPool.cpp | 3 +- src/Storages/MergeTree/MergeTreeReadTask.cpp | 3 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 2 +- .../MergeTree/MergeTreeSequentialSource.cpp | 10 +- src/Storages/MergeTree/MutateTask.cpp | 13 +- src/Storages/StorageDistributed.cpp | 28 ++- src/Storages/StorageDistributed.h | 5 +- src/Storages/StorageMerge.cpp | 53 +++-- src/Storages/StorageMerge.h | 5 +- src/Storages/StorageSnapshot.cpp | 11 +- src/Storages/StorageSnapshot.h | 5 + .../01848_partition_value_column.sql | 4 +- 25 files changed, 195 insertions(+), 303 deletions(-) delete mode 100644 src/Storages/LightweightDeleteDescription.cpp delete mode 100644 src/Storages/LightweightDeleteDescription.h diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 579bca216e9..a1b63960d40 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -907,8 +907,7 @@ void validateVirtualColumns(const IStorage & storage) auto virtual_columns = storage.getVirtualsDescription(); for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns()) { - auto virtual_desc = virtual_columns->tryGetDescription(storage_column.name); - if (virtual_desc && virtual_desc->kind == VirtualsKind::Persistent) + if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column", diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index e88dc121a82..8ac25d13bf0 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -265,7 +264,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) alter_command->partition = alter_command->children.emplace_back(command.partition).get(); auto row_exists_predicate = makeASTFunction("equals", - std::make_shared(LightweightDeleteDescription::FILTER_COLUMN.name), + std::make_shared(RowExistsColumn::name), std::make_shared(Field(0))); if (command.predicate) @@ -435,60 +434,54 @@ static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const static void validateUpdateColumns( const MutationsInterpreter::Source & source, - const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, - const std::unordered_map & column_to_affected_materialized) + const StorageMetadataPtr & metadata_snapshot, + const NameSet & updated_columns, + const std::unordered_map & column_to_affected_materialized, + const ContextPtr & context) { + auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); NameSet key_columns = getKeyColumns(source, metadata_snapshot); - for (const String & column_name : updated_columns) + const auto & storage_columns = storage_snapshot->metadata->getColumns(); + const auto & virtual_columns = *storage_snapshot->virtual_columns; + + for (const auto & column_name : updated_columns) { - auto found = false; - for (const auto & col : metadata_snapshot->getColumns().getOrdinary()) - { - if (col.name == column_name) - { - found = true; - break; - } - } - - /// Allow to override value of lightweight delete filter virtual column - if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!source.supportsLightweightDelete()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); - found = true; - } - - /// Dont allow to override value of block number virtual column - if (!found && column_name == BlockNumberColumn::name) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); - } - - if (!found) - { - for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) - { - if (col.name == column_name) - throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); - } - - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); - } - if (key_columns.contains(column_name)) throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name)); + if (storage_columns.tryGetColumn(GetColumnsOptions::Materialized, column_name)) + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); + auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) { - for (const String & materialized : materialized_it->second) + for (const auto & materialized : materialized_it->second) { if (key_columns.contains(materialized)) + { throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Updated column {} affects MATERIALIZED column {}, which is a key column. " "Cannot UPDATE it.", backQuote(column_name), backQuote(materialized)); + } + } + } + + if (!storage_columns.tryGetColumn(GetColumnsOptions::Ordinary, column_name)) + { + /// Allow to override value of lightweight delete filter virtual column + if (column_name == RowExistsColumn::name) + { + if (!source.supportsLightweightDelete()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + } + else if (virtual_columns.tryGet(column_name)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); + } + else + { + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } } } @@ -546,8 +539,8 @@ void MutationsInterpreter::prepare(bool dry_run) /// Add _row_exists column if it is physically present in the part if (source.hasLightweightDeleteMask()) { - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - available_columns_set.insert(LightweightDeleteDescription::FILTER_COLUMN.name); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + available_columns_set.insert(RowExistsColumn::name); } NameSet updated_columns; @@ -563,9 +556,7 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & [name, _] : command.column_to_update_expression) { - if (!available_columns_set.contains(name) - && name != LightweightDeleteDescription::FILTER_COLUMN.name - && name != BlockNumberColumn::name) + if (!available_columns_set.contains(name) && name != RowExistsColumn::name) throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is updated but not requested to read", name); @@ -590,7 +581,7 @@ void MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); + validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized, context); } StorageInMemoryMetadata::HasDependencyCallback has_dependency = @@ -666,15 +657,11 @@ void MutationsInterpreter::prepare(bool dry_run) { type = physical_column->type; } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + else if (column_name == RowExistsColumn::name) { - type = LightweightDeleteDescription::FILTER_COLUMN.type; + type = RowExistsColumn::type; deleted_mask_updated = true; } - else if (column_name == BlockNumberColumn::name) - { - type = BlockNumberColumn::type; - } else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name); @@ -1028,7 +1015,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// Add _row_exists column if it is present in the part if (source.hasLightweightDeleteMask() || deleted_mask_updated) - all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); bool has_filters = false; /// Next, for each stage calculate columns changed by this and previous stages. @@ -1038,7 +1025,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s { for (const auto & column : all_columns) { - if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated) + if (column.name == RowExistsColumn::name && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column.name); @@ -1057,7 +1044,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// and so it is not in the list of AllPhysical columns. for (const auto & [column_name, _] : prepared_stages[i].column_to_updated) { - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated) + if (column_name == RowExistsColumn::name && has_filters && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column_name); @@ -1148,93 +1135,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s } } -/// This structure re-implements adding virtual columns while reading from MergeTree part. -/// It would be good to unify it with IMergeTreeSelectAlgorithm. -struct VirtualColumns -{ - struct ColumnAndPosition - { - ColumnWithTypeAndName column; - size_t position; - }; - - using Columns = std::vector; - - Columns virtuals; - Names columns_to_read; - - VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns)) - { - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!part->getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name)) - { - ColumnWithTypeAndName mask_column; - mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type; - mask_column.column = mask_column.type->createColumnConst(0, 1); - mask_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i}); - } - } - else if (columns_to_read[i] == "_partition_id") - { - ColumnWithTypeAndName column; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, part->info.partition_id); - column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i}); - } - else if (columns_to_read[i] == BlockNumberColumn::name) - { - if (!part->getColumns().contains(BlockNumberColumn::name)) - { - ColumnWithTypeAndName block_number_column; - block_number_column.type = BlockNumberColumn::type; - block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block); - block_number_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i}); - } - } - } - - if (!virtuals.empty()) - { - Names columns_no_virtuals; - columns_no_virtuals.reserve(columns_to_read.size()); - size_t next_virtual = 0; - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position) - ++next_virtual; - else - columns_no_virtuals.emplace_back(std::move(columns_to_read[i])); - } - - columns_to_read.swap(columns_no_virtuals); - } - } - - void addVirtuals(QueryPlan & plan) - { - auto dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - - for (auto & column : virtuals) - { - const auto & adding_const = dag->addColumn(std::move(column.column)); - auto & outputs = dag->getOutputs(); - outputs.insert(outputs.begin() + column.position, &adding_const); - } - - auto step = std::make_unique(plan.getCurrentDataStream(), std::move(dag)); - plan.addStep(std::move(step)); - } -}; - void MutationsInterpreter::Source::read( Stage & first_stage, QueryPlan & plan, @@ -1277,16 +1177,12 @@ void MutationsInterpreter::Source::read( filter = ActionsDAG::buildFilterActionsDAG(nodes); } - VirtualColumns virtual_columns(std::move(required_columns), part); - createReadFromPartStep( MergeTreeSequentialSourceType::Mutation, - plan, *data, storage_snapshot, part, - std::move(virtual_columns.columns_to_read), + plan, *data, storage_snapshot, + part, required_columns, apply_deleted_mask_, filter, context_, getLogger("MutationsInterpreter")); - - virtual_columns.addVirtuals(plan); } else { diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 960fad5dec6..1a32b885f4d 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -990,8 +991,8 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) { auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical); options.withExtendedObjects(); - if (storage->supportsSubcolumns()) - options.withSubcolumns(); + options.withSubcolumns(storage->supportsSubcolumns()); + options.withVirtuals(); auto columns_from_storage = storage_snapshot->getColumns(options); @@ -1109,16 +1110,16 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select const auto & partition_desc = storage_snapshot->metadata->getPartitionKey(); if (partition_desc.expression) { - auto partition_source_columns = partition_desc.expression->getRequiredColumns(); - partition_source_columns.push_back("_part"); - partition_source_columns.push_back("_partition_id"); - partition_source_columns.push_back("_part_uuid"); - partition_source_columns.push_back("_partition_value"); + auto partition_columns = partition_desc.expression->getRequiredColumns(); + NameSet partition_columns_set(partition_columns.begin(), partition_columns.end()); + + const auto & parititon_virtuals = MergeTreeData::virtuals_useful_for_filter; + partition_columns_set.insert(parititon_virtuals.begin(), parititon_virtuals.end()); + optimize_trivial_count = true; for (const auto & required_column : required) { - if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column) - == partition_source_columns.end()) + if (partition_columns_set.contains(required_column)) { optimize_trivial_count = false; break; @@ -1129,7 +1130,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select NameSet unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) + for (auto it = source_columns.begin(); it != source_columns.end();) { const String & column_name = it->name; unknown_required_source_columns.erase(column_name); @@ -1141,32 +1142,14 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } has_virtual_shard_num = false; - /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add - /// in columns list, so that when further processing they are also considered. - if (storage) + if (is_remote_storage) { - const auto storage_virtuals = storage->getVirtuals(); - for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) + for (const auto & column : *storage_snapshot->virtual_columns) { - auto column = storage_virtuals.tryGetByName(*it); - if (column) + if (column.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery())) { - source_columns.push_back(*column); - it = unknown_required_source_columns.erase(it); - } - else - ++it; - } - - if (is_remote_storage) - { - for (const auto & name_type : storage_virtuals) - { - if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery())) - { - has_virtual_shard_num = true; - break; - } + has_virtual_shard_num = true; + break; } } } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 792f942fcf1..acd95a2b8d7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -955,8 +954,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada /// Drop alias is metadata alter, in other case mutation is required. if (type == DROP_COLUMN) - return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) || - column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name; + return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name); if (type != MODIFY_COLUMN || data_type == nullptr) return false; @@ -1246,7 +1244,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { - const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata(); + const auto & metadata = table->getInMemoryMetadata(); + const auto & virtuals = *table->getVirtualsDescription(); + auto all_columns = metadata.columns; /// Default expression for all added/modified columns ASTPtr default_expr_list = std::make_shared(); @@ -1282,16 +1282,20 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (command.data_type->hasDynamicSubcolumns()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for lightweight delete feature", backQuote(column_name)); - - if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for _block_number persisting feature", backQuote(column_name)); + if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); + { + const auto & settings = context->getSettingsRef(); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( + command.codec, command.data_type, + !settings.allow_suspicious_codecs, + settings.allow_experimental_codecs, + settings.enable_deflate_qpl_codec, + settings.enable_zstd_qat_codec); + } all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1405,9 +1409,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } else if (command.type == AlterCommand::DROP_COLUMN) { - if (all_columns.has(command.column_name) || - all_columns.hasNested(command.column_name) || - (command.clear && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)) + if (all_columns.has(command.column_name) || all_columns.hasNested(command.column_name)) { if (!command.clear) /// CLEAR column is Ok even if there are dependencies. { @@ -1491,16 +1493,12 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } if (all_columns.has(command.rename_to)) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: " - "column with this name already exists", backQuote(command.rename_to)); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, + "Cannot rename to {}: column with this name already exists", backQuote(command.rename_to)); - if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for lightweight delete feature", backQuote(command.rename_to)); - - if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for _block_number persisting feature", backQuote(command.rename_to)); + if (virtuals.tryGet(command.rename_to, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to)); if (modified_columns.contains(column_name)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} " diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 27593bfe7a8..18a934af767 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -38,7 +38,7 @@ IStorage::IStorage(StorageID storage_id_) bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overridden by real column - return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name); + return !metadata_snapshot->getColumns().has(column_name) && virtuals.get()->has(column_name); } RWLockImpl::LockHolder IStorage::tryLockTimed( diff --git a/src/Storages/LightweightDeleteDescription.cpp b/src/Storages/LightweightDeleteDescription.cpp deleted file mode 100644 index ae5e68da9c2..00000000000 --- a/src/Storages/LightweightDeleteDescription.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include - -namespace DB -{ - -const NameAndTypePair LightweightDeleteDescription::FILTER_COLUMN {"_row_exists", std::make_shared()}; - -} diff --git a/src/Storages/LightweightDeleteDescription.h b/src/Storages/LightweightDeleteDescription.h deleted file mode 100644 index 45bde59ea71..00000000000 --- a/src/Storages/LightweightDeleteDescription.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once -#include -#include "Storages/TTLDescription.h" - -namespace DB -{ - -struct LightweightDeleteDescription -{ - static const NameAndTypePair FILTER_COLUMN; -}; - -} diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e06ea5e560c..fe4ba5fc052 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1451,6 +1451,11 @@ bool IMergeTreeDataPart::supportLightweightDeleteMutate() const parent_part == nullptr && projection_parts.empty(); } +bool IMergeTreeDataPart::hasLightweightDelete() const +{ + return columns.contains(RowExistsColumn::name); +} + void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const { TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index d0eafc42ddc..fba1e6ddbb1 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -498,7 +497,7 @@ public: bool supportLightweightDeleteMutate() const; /// True if here is lightweight deleted mask file in part. - bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); } + bool hasLightweightDelete() const; void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index df64ae33713..4621314cb98 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -1075,14 +1074,18 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() if (global_ctx->deduplicate) { - /// We don't want to deduplicate by block number column - /// so if deduplicate_by_columns is empty, add all columns except _block_number - if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty()) + const auto & virtuals = *global_ctx->data->getVirtualsDescription(); + + /// We don't want to deduplicate by virtual persistent column. + /// If deduplicate_by_columns is empty, add all columns except virtuals. + if (global_ctx->deduplicate_by_columns.empty()) { - for (const auto & col : global_ctx->merging_column_names) + for (const auto & column_name : global_ctx->merging_column_names) { - if (col != BlockNumberColumn::name) - global_ctx->deduplicate_by_columns.emplace_back(col); + if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + continue; + + global_ctx->deduplicate_by_columns.emplace_back(column_name); } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 539532bfaca..cb319348b60 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -450,7 +450,7 @@ VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMet desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), ""); } - desc.addPersistent(LightweightDeleteDescription::FILTER_COLUMN.name, LightweightDeleteDescription::FILTER_COLUMN.type, nullptr, ""); + desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, ""); desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, ""); return desc; @@ -3651,6 +3651,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); + const auto & virtuals = *getVirtualsDescription(); if (!hasDynamicSubcolumns(columns)) return; @@ -3658,7 +3659,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts const auto & part_columns = part->getColumns(); for (const auto & part_column : part_columns) { - if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name) + if (virtuals.has(part_column.name)) continue; auto storage_column = columns.getPhysical(part_column.name); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 80ab1f337ee..f7bde252fb9 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1085,6 +1085,8 @@ public: bool initializeDiskOnConfigChange(const std::set & /*new_added_disks*/) override; + static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; @@ -1675,8 +1677,6 @@ private: void checkColumnFilenamesForCollision(const StorageInMemoryMetadata & metadata, bool throw_on_error) const; void checkColumnFilenamesForCollision(const ColumnsDescription & columns, const MergeTreeSettings & settings, bool throw_on_error) const; - - static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); }; /// RAII struct to record big parts that are submerging or emerging. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 8e3f2e07684..ee34a02b0b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -76,12 +76,6 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); - /// _block_number column is not added by user, but is persisted in a part after merge - /// If _block_number is not present in the parts to be merged, then it won't have a position - /// So check if its not present and add it at the end - if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name)) - ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type}); - return std::make_unique( shared_from_this(), ordered_columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 7cb3d6012d5..059caebcfc8 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -375,7 +376,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() update_stat_for_column(column.name); if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete()) - update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name); + update_stat_for_column(RowExistsColumn::name); for (const auto & pre_columns : read_info.task_columns.pre_columns) for (const auto & column : pre_columns) diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index 7bbabf6a18d..64fd37d14b1 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -57,7 +58,7 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( /// Add lightweight delete filtering step if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete()) - new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN})); + new_readers.prewhere.push_back(create_reader({{RowExistsColumn::name, RowExistsColumn::type}})); for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns) new_readers.prewhere.push_back(create_reader(pre_columns_per_step)); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 6b0c45c2431..e3900ccdd73 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -48,7 +48,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( { .type = PrewhereExprStep::Filter, .actions = nullptr, - .filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name, + .filter_column_name = RowExistsColumn::name, .remove_filter_column = true, .need_filter = true, .perform_alter_conversions = true, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 35f5782b95a..fb0bc617aa4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -136,7 +136,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( { auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withVirtuals(VirtualsKind::Persistent) + .withVirtuals() .withSubcolumns(storage.supportsSubcolumns()); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); @@ -242,6 +242,7 @@ try if (rows_read) { fillBlockNumberColumns(columns, sample, data_part->info.min_block, current_row, rows_read); + reader->fillVirtualColumns(columns, rows_read); current_row += rows_read; current_mark += (rows_to_read == rows_read); @@ -315,14 +316,13 @@ Pipe createMergeTreeSequentialSource( bool quiet, std::shared_ptr> filtered_rows_count) { - const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN; /// The part might have some rows masked by lightweight deletes const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete(); - const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end(); + const bool has_filter_column = std::ranges::find(columns_to_read, RowExistsColumn::name) != columns_to_read.end(); if (need_to_filter_deleted_rows && !has_filter_column) - columns_to_read.emplace_back(filter_column.name); + columns_to_read.emplace_back(RowExistsColumn::name); auto column_part_source = std::make_shared(type, storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), @@ -336,7 +336,7 @@ Pipe createMergeTreeSequentialSource( pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header) { return std::make_shared( - header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count); + header, nullptr, RowExistsColumn::name, !has_filter_column, false, filtered_rows_count); }); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 065a2d86296..b3c36f7180b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -283,7 +283,6 @@ getColumnsForNewDataPart( ColumnsDescription part_columns(source_part->getColumns()); NamesAndTypesList system_columns; - const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN; bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate(); bool deleted_mask_updated = false; @@ -299,9 +298,9 @@ getColumnsForNewDataPart( { for (const auto & [column_name, _] : command.column_to_update_expression) { - if (column_name == deleted_mask_column.name + if (column_name == RowExistsColumn::name && supports_lightweight_deletes - && !storage_columns_set.contains(deleted_mask_column.name)) + && !storage_columns_set.contains(RowExistsColumn::name)) deleted_mask_updated = true; } } @@ -323,12 +322,12 @@ getColumnsForNewDataPart( } } - if (!storage_columns_set.contains(deleted_mask_column.name)) + if (!storage_columns_set.contains(RowExistsColumn::name)) { - if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command)) + if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command)) { - storage_columns.push_back(deleted_mask_column); - storage_columns_set.insert(deleted_mask_column.name); + storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + storage_columns_set.insert(RowExistsColumn::name); } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1a34db86d4f..83eb3e55853 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -30,6 +31,7 @@ #include #include #include +#include "Storages/StorageInMemoryMetadata.h" #include #include @@ -108,7 +110,6 @@ #include #include -#include #include @@ -290,22 +291,18 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus StorageDistributed::~StorageDistributed() = default; -NamesAndTypesList StorageDistributed::getVirtuals() const +VirtualColumnsDescription StorageDistributed::createVirtuals() { - /// NOTE This is weird. Most of these virtual columns are part of MergeTree + /// NOTE: This is weird. + /// Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. - return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared(std::make_shared())), - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - NameAndTypePair("_row_exists", std::make_shared()), - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - NameAndTypePair("_shard_num", std::make_shared()), /// deprecated - }; + StorageInMemoryMetadata metadata; + auto desc = MergeTreeData::createVirtuals(metadata); + + desc.addEphemeral("_table", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_shard_num", std::make_shared(), "deprecated"); + + return desc; } StorageDistributed::StorageDistributed( @@ -354,6 +351,7 @@ StorageDistributed::StorageDistributed( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); if (sharding_key_) { diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index c00dd8cea04..51e4ccd4da3 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -9,6 +9,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" #include #include @@ -146,8 +147,6 @@ public: ActionLock getActionLock(StorageActionBlockType type) override; - NamesAndTypesList getVirtuals() const override; - /// Used by InterpreterInsertQuery std::string getRemoteDatabaseName() const { return remote_database; } std::string getRemoteTableName() const { return remote_table; } @@ -234,6 +233,8 @@ private: std::optional distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context) const; std::optional distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr context) const; + static VirtualColumnsDescription createVirtuals(); + String remote_database; String remote_table; ASTPtr remote_table_function_ptr; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b827670bd4c..0ddfc2a6bb4 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ #include #include #include +#include "Storages/StorageSnapshot.h" #include #include @@ -132,6 +134,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::StorageMerge( @@ -154,6 +157,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::DatabaseTablesIterators StorageMerge::getDatabaseIterators(ContextPtr context_) const @@ -306,6 +310,37 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState); } +VirtualColumnsDescription StorageMerge::createVirtuals() +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_database", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), ""); + + return desc; +} + +StorageSnapshotPtr StorageMerge::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + static const auto common_virtuals = createVirtuals(); + + auto virtuals = common_virtuals; + if (auto first_table = getFirstTable([](auto && table) { return table; })) + { + auto table_virtuals = first_table->getVirtualsDescription(); + for (const auto & column : *table_virtuals) + { + if (virtuals.has(column.name)) + continue; + + virtuals.add(column); + } + } + + auto virtuals_ptr = std::make_shared(std::move(virtuals)); + return std::make_shared(*this, metadata_snapshot, std::move(virtuals_ptr)); +} + void StorageMerge::read( QueryPlan & query_plan, const Names & column_names, @@ -897,7 +932,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ column_node = std::make_shared(NameAndTypePair{column, storage_columns.getColumn(get_column_options, column).type }, modified_query_info.table_expression); } - PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); actions_visitor.visit(filter_actions_dag, column_node); } @@ -1375,6 +1409,7 @@ void StorageMerge::alter( params.apply(storage_metadata, local_context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, storage_metadata); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } void ReadFromMerge::convertAndFilterSourceStream( @@ -1634,20 +1669,4 @@ void registerStorageMerge(StorageFactory & factory) }); } -NamesAndTypesList StorageMerge::getVirtuals() const -{ - NamesAndTypesList virtuals{ - {"_database", std::make_shared(std::make_shared())}, - {"_table", std::make_shared(std::make_shared())}}; - - auto first_table = getFirstTable([](auto && table) { return table; }); - if (first_table) - { - auto table_virtuals = first_table->getVirtuals(); - virtuals.insert(virtuals.end(), table_virtuals.begin(), table_virtuals.end()); - } - - return virtuals; -} - } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 6959545430c..661750fb6dd 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -56,6 +56,8 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, @@ -117,11 +119,12 @@ private: template void forEachTable(F && func) const; - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; ColumnsDescription getColumnsDescriptionFromSourceTables() const; + static VirtualColumnsDescription createVirtuals(); + bool tableSupportsPrewhere() const; template diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 73546093ff6..71d2809e18a 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -28,6 +27,16 @@ StorageSnapshot::StorageSnapshot( { } +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + VirtualsDescriptionPtr virtual_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(std::move(virtual_columns_)) +{ +} + StorageSnapshot::StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index e0af0525b26..390a5037780 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -37,6 +37,11 @@ struct StorageSnapshot const IStorage & storage_, StorageMetadataPtr metadata_); + StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + VirtualsDescriptionPtr virtual_columns_); + StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, diff --git a/tests/queries/0_stateless/01848_partition_value_column.sql b/tests/queries/0_stateless/01848_partition_value_column.sql index 28d842af3e9..de5e766c92c 100644 --- a/tests/queries/0_stateless/01848_partition_value_column.sql +++ b/tests/queries/0_stateless/01848_partition_value_column.sql @@ -14,8 +14,8 @@ select count() from tbl where _partition_value.3 = 4 settings max_rows_to_read = create table tbl2(i int) engine MergeTree order by i; insert into tbl2 values (1); -select _partition_value from tbl2; -- { serverError 16 } -select _partition_value from tbl2 group by 1; -- { serverError 16 } +select _partition_value from tbl2; -- { serverError UNKNOWN_IDENTIFIER } +select _partition_value from tbl2 group by 1; -- { serverError UNKNOWN_IDENTIFIER } drop table tbl; drop table tbl2; From 2ccaf954f61576efbae4b2920aea792152fe134d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Feb 2024 20:46:37 +0000 Subject: [PATCH 082/356] Fix wacky primary key sorting in SHOW INDEX --- .../InterpreterShowIndexesQuery.cpp | 4 ++-- .../0_stateless/02724_show_indexes.reference | 24 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index e8005ead91e..5be72dc8ce6 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -37,7 +37,7 @@ FROM ( name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - row_number() over (order by column_name) AS seq_in_index, + row_number() over (order by null) AS seq_in_index, arrayJoin(splitByString(', ', primary_key)) AS column_name, 'A' AS collation, 0 AS cardinality, @@ -75,7 +75,7 @@ FROM ( database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); +ORDER BY index_type, expression, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index e41f2521f5c..ac0461fc506 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -2,33 +2,33 @@ tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names @@ -40,8 +40,8 @@ NULL 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b From 7077499064538a43617e56b28b21411b9ee11828 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 28 Feb 2024 20:50:56 +0000 Subject: [PATCH 083/356] PullingAsyncPipelineExecutor cleanup lazy_format is used always --- .../PullingAsyncPipelineExecutor.cpp | 37 +++---------------- .../Executors/PushingAsyncPipelineExecutor.h | 1 - 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 345bec395b2..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -23,7 +23,6 @@ struct PullingAsyncPipelineExecutor::Data std::atomic_bool is_finished = false; std::atomic_bool has_exception = false; ThreadFromGlobalPool thread; - Poco::Event finish_event; ~Data() { @@ -89,12 +88,10 @@ static void threadFunction( data.has_exception = true; /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - if (data.lazy_format) - data.lazy_format->finalize(); + data.lazy_format->finalize(); } data.is_finished = true; - data.finish_event.set(); } @@ -129,20 +126,8 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) return false; } - if (lazy_format) - { - chunk = lazy_format->getChunk(milliseconds); - data->rethrowExceptionIfHas(); - return true; - } - - chunk.clear(); - - if (milliseconds) - data->finish_event.tryWait(milliseconds); - else - data->finish_event.wait(); - + chunk = lazy_format->getChunk(milliseconds); + data->rethrowExceptionIfHas(); return true; } @@ -230,14 +215,12 @@ void PullingAsyncPipelineExecutor::cancelWithExceptionHandling(CancelFunc && can Chunk PullingAsyncPipelineExecutor::getTotals() { - return lazy_format ? lazy_format->getTotals() - : Chunk(); + return lazy_format->getTotals(); } Chunk PullingAsyncPipelineExecutor::getExtremes() { - return lazy_format ? lazy_format->getExtremes() - : Chunk(); + return lazy_format->getExtremes(); } Block PullingAsyncPipelineExecutor::getTotalsBlock() @@ -264,15 +247,7 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() ProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { - if (lazy_format) - return lazy_format->getProfileInfo(); - - static ProfileInfo profile_info; - static std::once_flag flag; - /// Calculate rows before limit here to avoid race. - std::call_once(flag, []() { profile_info.getRowsBeforeLimit(); }); - - return profile_info; + return lazy_format->getProfileInfo(); } } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.h b/src/Processors/Executors/PushingAsyncPipelineExecutor.h index 4b4b83a90b5..f976cd4c339 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include namespace DB From dc135294f0ecee44c3e56cd5127baf742674f04e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 25 Feb 2024 23:49:25 +0100 Subject: [PATCH 084/356] add seccomp=unconfined --- tests/ci/clickbench.py | 1 + tests/ci/fast_test_check.py | 3 ++- tests/ci/functional_test_check.py | 1 + tests/ci/libfuzzer_test_check.py | 1 + tests/ci/sqllogic_test.py | 1 + tests/ci/unit_tests_check.py | 1 + 6 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 50c7bb85d28..a1988abb1f5 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -42,6 +42,7 @@ def get_run_command( f"{ci_logs_args}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {image}" ) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 5d528bb4c48..a624ee06e85 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -37,9 +37,10 @@ def get_fasttest_cmd( ) -> str: return ( f"docker run --cap-add=SYS_PTRACE --user={os.geteuid()}:{os.getegid()} " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--network=host " # required to get access to IAM credentials f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " - f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE " + f"-e FASTTEST_SOURCE=/ClickHouse " f"-e FASTTEST_CMAKE_FLAGS='-DCOMPILER_CACHE=sccache' " f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} " f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index da2dea60fc1..160da202a1a 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -107,6 +107,7 @@ def get_run_command( f"{volume_with_broken_test}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 5f41afe9fb6..465b9a6b3a7 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -81,6 +81,7 @@ def get_run_command( f"--volume={fuzzers_path}:/fuzzers " f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"--volume={result_path}:/test_output " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index e9a109e425e..6ea6fa19d91 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -42,6 +42,7 @@ def get_run_command( f"--volume={repo_tests_path}:/clickhouse-tests " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {image}" ) diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 41c52d53020..f64f114d3de 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -176,6 +176,7 @@ def main(): run_command = ( f"docker run --cap-add=SYS_PTRACE --volume={tests_binary}:/unit_tests_dbms " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--volume={test_output}:/test_output {docker_image}" ) From 27923ee61918ad3ffc7e42cc0f12f471dd11c8eb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Feb 2024 19:04:25 +0100 Subject: [PATCH 085/356] revert revert --- src/Storages/StorageFile.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 595573b566d..7d674fea9ca 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -92,6 +94,7 @@ namespace ErrorCodes extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; extern const int CANNOT_DETECT_FORMAT; extern const int CANNOT_COMPILE_REGEXP; + extern const int UNSUPPORTED_METHOD; } namespace @@ -276,6 +279,22 @@ std::unique_ptr selectReadBuffer( ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); } + else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) + { +#if USE_LIBURING + auto & reader = context->getIOURingReader(); + if (!reader.isSupported()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); + + res = std::make_unique( + reader, + Priority{}, + current_path, + context->getSettingsRef().max_read_buffer_size); +#else + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux"); +#endif + } else { if (use_table_fd) From 763bd227259c8f54c0babcb13916ac7dc6c8205a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:43:03 +0100 Subject: [PATCH 086/356] Synchronize metrics and Keeper --- src/Common/CurrentMetrics.cpp | 12 +++++- src/Common/ErrorCodes.cpp | 4 ++ src/Common/FailPoint.cpp | 8 ++++ src/Common/ProfileEvents.cpp | 48 +++++++++++++++++++++++- src/Common/SystemLogBase.cpp | 1 + src/Common/SystemLogBase.h | 1 + src/Common/ThreadStatus.cpp | 5 ++- src/Common/ZooKeeper/IKeeper.h | 5 +++ src/Common/ZooKeeper/TestKeeper.cpp | 11 ++++++ src/Common/ZooKeeper/TestKeeper.h | 4 ++ src/Common/ZooKeeper/ZooKeeper.cpp | 5 +++ src/Common/ZooKeeper/ZooKeeper.h | 1 + src/Common/ZooKeeper/ZooKeeperCommon.cpp | 10 +++++ src/Common/ZooKeeper/ZooKeeperCommon.h | 7 +--- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 7 ++++ src/Common/ZooKeeper/ZooKeeperImpl.h | 4 ++ src/Common/ZooKeeper/ZooKeeperRetries.h | 15 ++++++++ 17 files changed, 139 insertions(+), 9 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 6931001202d..82da4c4bbad 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -264,7 +264,17 @@ M(RefreshingViews, "Number of materialized views currently executing a refresh") \ M(StorageBufferFlushThreads, "Number of threads for background flushes in StorageBuffer") \ M(StorageBufferFlushThreadsActive, "Number of threads for background flushes in StorageBuffer running a task") \ - M(StorageBufferFlushThreadsScheduled, "Number of queued or active threads for background flushes in StorageBuffer") + M(StorageBufferFlushThreadsScheduled, "Number of queued or active threads for background flushes in StorageBuffer") \ + M(SharedMergeTreeThreads, "Number of threads in the thread pools in internals of SharedMergeTree") \ + M(SharedMergeTreeThreadsActive, "Number of threads in the thread pools in internals of SharedMergeTree running a task") \ + M(SharedMergeTreeThreadsScheduled, "Number of queued or active threads in the thread pools in internals of SharedMergeTree") \ + M(SharedMergeTreeFetch, "Number of fetches in progress") \ + M(CacheWarmerBytesInProgress, "Total size of remote file segments waiting to be asynchronously loaded into filesystem cache.") \ + M(DistrCacheOpenedConnections, "Number of open connections to Distributed Cache") \ + M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \ + M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \ + M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \ + M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ca00f2fd513..eca4db2307c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -585,6 +585,10 @@ M(703, INVALID_IDENTIFIER) \ M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \ M(705, TABLE_NOT_EMPTY) \ + \ + M(900, DISTRIBUTED_CACHE_ERROR) \ + M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ + \ M(706, LIBSSH_ERROR) \ M(707, GCP_ERROR) \ M(708, ILLEGAL_STATISTIC) \ diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index a23133b7522..9e551c8f2cd 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -39,6 +39,14 @@ static struct InitFiu REGULAR(replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault) \ REGULAR(use_delayed_remote_source) \ REGULAR(cluster_discovery_faults) \ + ONCE(smt_commit_merge_mutate_zk_fail_after_op) \ + ONCE(smt_commit_merge_mutate_zk_fail_before_op) \ + ONCE(smt_commit_write_zk_fail_after_op) \ + ONCE(smt_commit_write_zk_fail_before_op) \ + ONCE(smt_commit_merge_change_version_before_op) \ + ONCE(smt_merge_mutate_intention_freeze_in_destructor) \ + ONCE(meta_in_keeper_create_metadata_failure) \ + REGULAR(cache_warmer_stall) \ REGULAR(check_table_query_delay_for_part) \ REGULAR(dummy_failpoint) \ REGULAR(prefetched_reader_pool_failpoint) \ diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d8ca1ab9e93..53da7901577 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -92,6 +92,8 @@ M(LocalWriteThrottlerBytes, "Bytes passed through 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttler.") \ M(LocalWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttling.") \ M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform all throttling settings.") \ + M(PartsWithAppliedMutationsOnFly, "Total number of parts for which there was any mutation applied on fly") \ + M(MutationsAppliedOnFlyInAllParts, "The sum of number of applied mutations on-fly for part among all read parts") \ \ M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \ \ @@ -311,6 +313,12 @@ The server successfully detected this situation and will download merged part fr M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \ M(ParallelReplicasStealingLeftoversMicroseconds, "Time spent collecting orphaned segments") \ M(ParallelReplicasCollectingOwnedSegmentsMicroseconds, "Time spent collecting segments meant by hash") \ + M(ParallelReplicasNumRequests, "Number of requests to the initiator.") \ + M(ParallelReplicasDeniedRequests, "Number of completely denied requests to the initiator") \ + M(CacheWarmerBytesDownloaded, "Amount of data fetched into filesystem cache by dedicated background threads.") \ + M(CacheWarmerDataPartsDownloaded, "Number of data parts that were fully fetched by CacheWarmer.") \ + M(IgnoredColdParts, "See setting ignore_cold_parts_seconds. Number of times read queries ignored very new parts that weren't pulled into cache by CacheWarmer yet.") \ + M(PreferredWarmedUnmergedParts, "See setting prefer_warmed_unmerged_parts_seconds. Number of times read queries used outdated pre-merge parts that are in cache instead of merged part that wasn't pulled into cache by CacheWarmer yet.") \ \ M(PerfCPUCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.") \ M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \ @@ -516,6 +524,21 @@ The server successfully detected this situation and will download merged part fr M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ \ + M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ + M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ + M(MetadataFromKeeperCacheUpdateMicroseconds, "Total time spent in updating the cache including waiting for responses from Keeper") \ + M(MetadataFromKeeperUpdateCacheOneLevel, "Number of times a cache update for one level of directory tree was done") \ + M(MetadataFromKeeperTransactionCommit, "Number of times metadata transaction commit was attempted") \ + M(MetadataFromKeeperTransactionCommitRetry, "Number of times metadata transaction commit was retried") \ + M(MetadataFromKeeperCleanupTransactionCommit, "Number of times metadata transaction commit for deleted objects cleanup was attempted") \ + M(MetadataFromKeeperCleanupTransactionCommitRetry, "Number of times metadata transaction commit for deleted objects cleanup was retried") \ + M(MetadataFromKeeperOperations, "Number of times a request was made to Keeper") \ + M(MetadataFromKeeperIndividualOperations, "Number of paths read or written by single or multi requests to Keeper") \ + M(MetadataFromKeeperReconnects, "Number of times a reconnect to Keeper was done") \ + M(MetadataFromKeeperBackgroundCleanupObjects, "Number of times a old deleted object clean up was performed by background task") \ + M(MetadataFromKeeperBackgroundCleanupTransactions, "Number of times old transaction idempotency token was cleaned up by background task") \ + M(MetadataFromKeeperBackgroundCleanupErrors, "Number of times an error was encountered in background cleanup task") \ + \ M(KafkaRebalanceRevocations, "Number of partition revocations (the first stage of consumer group rebalance)") \ M(KafkaRebalanceAssignments, "Number of partition assignments (the final stage of consumer group rebalance)") \ M(KafkaRebalanceErrors, "Number of failed consumer group rebalances") \ @@ -607,9 +630,32 @@ The server successfully detected this situation and will download merged part fr M(MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, "Time spent in sending the announcement from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ \ M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.") \ - \ M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.") \ \ + M(DistrCacheServerSwitches, "Number of server switches between distributed cache servers in read/write-through cache") \ + M(DistrCacheReadMicroseconds, "Time spent reading from distributed cache") \ + M(DistrCacheFallbackReadMicroseconds, "Time spend reading from fallback buffer instead of distribted cache") \ + M(DistrCachePrecomputeRangesMicroseconds, "Time spent to precompute read ranges") \ + M(DistrCacheNextImplMicroseconds, "Time spend in ReadBufferFromDistributedCache::nextImpl") \ + M(DistrCacheOpenedConnections, "The number of open connections to distributed cache") \ + M(DistrCacheReusedConnections, "The number of reused connections to distributed cache") \ + M(DistrCacheHoldConnections, "The number of used connections to distributed cache") \ + \ + M(DistrCacheGetResponseMicroseconds, "Time spend to wait for response from distributed cache") \ + M(DistrCacheStartRangeMicroseconds, "Time spent to start a new read range with distributed cache") \ + M(DistrCacheLockRegistryMicroseconds, "Time spent to take DistributedCacheRegistry lock") \ + M(DistrCacheUnusedPackets, "Number of skipped unused packets from distributed cache") \ + M(DistrCachePackets, "Total number of packets received from distributed cache") \ + M(DistrCacheUnusedPacketsBytes, "The number of bytes in Data packets which were ignored") \ + M(DistrCacheRegistryUpdateMicroseconds, "Time spent updating distributed cache registry") \ + M(DistrCacheRegistryUpdates, "Number of distributed cache registry updates") \ + \ + M(DistrCacheConnectMicroseconds, "The time spent to connect to distributed cache") \ + M(DistrCacheConnectAttempts, "The number of connection attempts to distributed cache") \ + M(DistrCacheGetClient, "Number of client access times") \ + \ + M(DistrCacheServerProcessRequestMicroseconds, "Time spent processing request on DistributedCache server side") \ + \ M(LogTest, "Number of log messages with level Test") \ M(LogTrace, "Number of log messages with level Trace") \ M(LogDebug, "Number of log messages with level Debug") \ diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 4dee6d905d9..0e7287c59ac 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index a734c70f285..c509887cd28 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -29,6 +29,7 @@ M(TextLogElement) \ M(S3QueueLogElement) \ M(FilesystemCacheLogElement) \ + M(DistributedCacheLogElement) \ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ M(BackupLogElement) \ diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 05524a5d6b9..cf50d305e95 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -196,8 +196,9 @@ bool ThreadStatus::isQueryCanceled() const if (!thread_group) return false; - chassert(local_data.query_is_canceled_predicate); - return local_data.query_is_canceled_predicate(); + if (local_data.query_is_canceled_predicate) + return local_data.query_is_canceled_predicate(); + return false; } ThreadStatus::~ThreadStatus() diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 76cdfe9f230..04f53ead066 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /** Generic interface for ZooKeeper-like services. @@ -622,6 +623,10 @@ public: int32_t version, ReconfigCallback callback) = 0; + virtual void multi( + std::span requests, + MultiCallback callback) = 0; + virtual void multi( const Requests & requests, MultiCallback callback) = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index a25329ad7c0..fce29a21e15 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -157,6 +157,10 @@ struct TestKeeperReconfigRequest final : ReconfigRequest, TestKeeperRequest struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest { explicit TestKeeperMultiRequest(const Requests & generic_requests) + : TestKeeperMultiRequest(std::span(generic_requests)) + {} + + explicit TestKeeperMultiRequest(std::span generic_requests) { requests.reserve(generic_requests.size()); @@ -883,6 +887,13 @@ void TestKeeper::reconfig( void TestKeeper::multi( const Requests & requests, MultiCallback callback) +{ + return multi(std::span(requests), std::move(callback)); +} + +void TestKeeper::multi( + std::span requests, + MultiCallback callback) { TestKeeperMultiRequest request(requests); diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 36db5accff1..2774055652c 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -101,6 +101,10 @@ public: const Requests & requests, MultiCallback callback) override; + void multi( + std::span requests, + MultiCallback callback) override; + void finalize(const String & reason) override; bool isFeatureEnabled(DB::KeeperFeatureFlag) const override diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 93568909041..ca0a211c716 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1266,6 +1266,11 @@ std::future ZooKeeper::asyncTryRemoveNoThrow(const } std::future ZooKeeper::asyncTryMultiNoThrow(const Coordination::Requests & ops) +{ + return asyncTryMultiNoThrow(std::span(ops)); +} + +std::future ZooKeeper::asyncTryMultiNoThrow(std::span ops) { auto promise = std::make_shared>(); auto future = promise->get_future(); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index f1c333bb378..b2e159b0450 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -550,6 +550,7 @@ public: FutureMulti asyncMulti(const Coordination::Requests & ops); /// Like the previous one but don't throw any exceptions on future.get() FutureMulti asyncTryMultiNoThrow(const Coordination::Requests & ops); + FutureMulti asyncTryMultiNoThrow(std::span ops); using FutureSync = std::future; FutureSync asyncSync(const std::string & path); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 660ae59e81e..4634eae7759 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -156,6 +156,12 @@ std::string ZooKeeperAuthRequest::toStringImpl() const void ZooKeeperCreateRequest::writeImpl(WriteBuffer & out) const { + /// See https://github.com/ClickHouse/clickhouse-private/issues/3029 + if (path.starts_with("/clickhouse/tables/") && path.find("/parts/") != std::string::npos) + { + LOG_TRACE(getLogger(__PRETTY_FUNCTION__), "Creating part at path {}", path); + } + Coordination::write(path, out); Coordination::write(data, out); Coordination::write(acls, out); @@ -480,6 +486,10 @@ OpNum ZooKeeperMultiRequest::getOpNum() const } ZooKeeperMultiRequest::ZooKeeperMultiRequest(const Requests & generic_requests, const ACLs & default_acls) + : ZooKeeperMultiRequest(std::span{generic_requests}, default_acls) +{} + +ZooKeeperMultiRequest::ZooKeeperMultiRequest(std::span generic_requests, const ACLs & default_acls) { /// Convert nested Requests to ZooKeeperRequests. /// Note that deep copy is required to avoid modifying path in presence of chroot prefix. diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 5289be7a816..a1bd9b582e9 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -7,17 +7,13 @@ #include #include #include -#include #include -#include -#include #include #include -#include -#include #include #include #include +#include namespace Coordination @@ -516,6 +512,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest ZooKeeperMultiRequest() = default; ZooKeeperMultiRequest(const Requests & generic_requests, const ACLs & default_acls); + ZooKeeperMultiRequest(std::span generic_requests, const ACLs & default_acls); void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 1fbadbd7616..8fd6e89dfd9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1454,6 +1454,13 @@ void ZooKeeper::reconfig( void ZooKeeper::multi( const Requests & requests, MultiCallback callback) +{ + multi(std::span(requests), std::move(callback)); +} + +void ZooKeeper::multi( + std::span requests, + MultiCallback callback) { ZooKeeperMultiRequest request(requests, default_acls); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index b63f67bf7a6..d089ab7cc04 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -194,6 +194,10 @@ public: int32_t version, ReconfigCallback callback) final; + void multi( + std::span requests, + MultiCallback callback) override; + void multi( const Requests & requests, MultiCallback callback) override; diff --git a/src/Common/ZooKeeper/ZooKeeperRetries.h b/src/Common/ZooKeeper/ZooKeeperRetries.h index ecef174c6c7..d411549346a 100644 --- a/src/Common/ZooKeeper/ZooKeeperRetries.h +++ b/src/Common/ZooKeeper/ZooKeeperRetries.h @@ -147,6 +147,11 @@ public: user_error = UserError{}; } + void setKeeperError(const zkutil::KeeperException & exception) + { + setKeeperError(std::make_exception_ptr(exception), exception.code, exception.message()); + } + void stopRetries() { stop_retries = true; } bool isLastRetry() const { return total_failures >= retries_info.max_retries; } @@ -180,6 +185,12 @@ private: bool canTry() { + if (unconditional_retry) + { + unconditional_retry = false; + return true; + } + if (iteration_succeeded) { if (logger && total_failures > 0) @@ -275,6 +286,10 @@ private: UInt64 current_iteration = 0; UInt64 current_backoff_ms = 0; + +public: + /// This is used in SharedMergeTree + bool unconditional_retry = false; }; } From 282c3b55f21e3b3da1cccf0570fd732097d98305 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:47:33 +0100 Subject: [PATCH 087/356] Synchronize small pieces --- src/Coordination/KeeperSnapshotManagerS3.cpp | 1 + src/Coordination/Standalone/Context.cpp | 5 +++++ src/Coordination/Standalone/Context.h | 3 +++ src/Core/MySQL/Authentication.cpp | 5 +++++ src/Core/SettingsEnums.cpp | 8 ++++++++ src/Core/SettingsEnums.h | 4 ++++ src/DataTypes/Serializations/ISerialization.cpp | 1 - 7 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 0337a564660..f2d861e8fd4 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -215,6 +215,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh } /// To avoid reference to binding + const auto & snapshot_path_ref = snapshot_path; SCOPE_EXIT( diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 374610769c4..264cf118501 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -382,4 +382,9 @@ std::shared_ptr Context::getZooKeeper() const throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); } +const ServerSettings & Context::getServerSettings() const +{ + return shared->server_settings; +} + } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 49ad2b568fe..adb9111185f 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -160,6 +161,8 @@ public: void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); zkutil::ZooKeeperPtr getZooKeeper() const; + + const ServerSettings & getServerSettings() const; }; } diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index ac6ed70dbb5..ac625e216cd 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -9,6 +9,11 @@ #include #include +#include +#include + + +using namespace std::literals; namespace DB { diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 04e1d0a18c8..ba41a4ed7e7 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -56,6 +56,14 @@ IMPLEMENT_SETTING_ENUM(OverflowMode, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, {"break", OverflowMode::BREAK}}) +IMPLEMENT_SETTING_ENUM(DistributedCacheLogMode, ErrorCodes::BAD_ARGUMENTS, + {{"nothing", DistributedCacheLogMode::LOG_NOTHING}, + {"on_error", DistributedCacheLogMode::LOG_ON_ERROR}, + {"all", DistributedCacheLogMode::LOG_ALL}}) + +IMPLEMENT_SETTING_ENUM(DistributedCachePoolBehaviourOnLimit, ErrorCodes::BAD_ARGUMENTS, + {{"wait", DistributedCachePoolBehaviourOnLimit::WAIT}, + {"allocate_bypassing_pool", DistributedCachePoolBehaviourOnLimit::ALLOCATE_NEW_BYPASSING_POOL}}); IMPLEMENT_SETTING_ENUM(OverflowModeGroupBy, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 691eefbd4e6..db9842aaf86 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -141,6 +141,10 @@ enum class DefaultTableEngine DECLARE_SETTING_ENUM(DefaultTableEngine) +DECLARE_SETTING_ENUM(DistributedCacheLogMode) + +DECLARE_SETTING_ENUM(DistributedCachePoolBehaviourOnLimit) + enum class CleanDeletedRows { Never = 0, /// Disable. diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 7d57d72090b..a3a28f8091c 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -417,4 +417,3 @@ void ISerialization::throwUnexpectedDataAfterParsedValue(IColumn & column, ReadB } } - From 3b18eb4f17e5c85afff8d2a6f07ec77c0ab98129 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Feb 2024 22:47:34 +0000 Subject: [PATCH 088/356] Fix clang-tidy in some headers --- .clang-tidy | 2 +- programs/copier/Internals.h | 2 +- programs/odbc-bridge/ODBCPooledConnectionFactory.h | 3 +-- programs/server/Server.cpp | 2 +- src/Backups/registerBackupEngineS3.cpp | 6 ++---- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 3903911a277..0dacf813c7e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -10,7 +10,7 @@ # TODO Let clang-tidy check headers in further directories # --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' -HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$' +HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$' Checks: '*, -abseil-*, diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 48f4b0fab09..27fedd5d9e8 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -102,7 +102,7 @@ struct TaskStateWithOwner return TaskStateWithOwner(state, owner).toString(); } - String toString() + String toString() const { WriteBufferFromOwnString wb; wb << static_cast(state) << "\n" << escape << owner; diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index b70e45f2b9d..c4e3d4c12c6 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -40,7 +40,6 @@ public: explicit ConnectionHolder(const String & connection_string_) : pool(nullptr) - , connection() , connection_string(connection_string_) { updateConnection(); @@ -143,7 +142,7 @@ public: { std::lock_guard lock(mutex); - if (!factory.count(connection_string)) + if (!factory.contains(connection_string)) factory.emplace(std::make_pair(connection_string, std::make_shared(pool_size))); auto & pool = factory[connection_string]; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 6dc33042a05..eb38c7e2ff5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -184,7 +184,7 @@ static bool jemallocOptionEnabled(const char *name) return value; } #else -static bool jemallocOptionEnabled(const char *) { return 0; } +static bool jemallocOptionEnabled(const char *) { return false; } #endif int mainEntryClickHouseServer(int argc, char ** argv) diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 86941040021..fed5c6b4d22 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -15,8 +15,6 @@ namespace DB { -namespace fs = std::filesystem; - namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -65,13 +63,13 @@ void registerBackupEngineS3(BackupFactory & factory) secret_access_key = config.getString(config_prefix + ".secret_access_key", ""); if (config.has(config_prefix + ".filename")) - s3_uri = fs::path(s3_uri) / config.getString(config_prefix + ".filename"); + s3_uri = std::filesystem::path(s3_uri) / config.getString(config_prefix + ".filename"); if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup S3 requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - s3_uri = fs::path(s3_uri) / args[0].safeGet(); + s3_uri = std::filesystem::path(s3_uri) / args[0].safeGet(); } else { From 704b32fdcd7d0cb6171d8b6fdcb6d440e4f5f4ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:49:35 +0100 Subject: [PATCH 089/356] Fix build --- src/Core/SettingsEnums.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index db9842aaf86..0d0138e6246 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -141,9 +141,6 @@ enum class DefaultTableEngine DECLARE_SETTING_ENUM(DefaultTableEngine) -DECLARE_SETTING_ENUM(DistributedCacheLogMode) - -DECLARE_SETTING_ENUM(DistributedCachePoolBehaviourOnLimit) enum class CleanDeletedRows { From 9e826bb11ce9482dfa1cc1984618553b0a682c72 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:50:15 +0100 Subject: [PATCH 090/356] Fix build --- src/Common/SystemLogBase.cpp | 1 - src/Core/SettingsEnums.cpp | 5 ----- 2 files changed, 6 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 0e7287c59ac..4dee6d905d9 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index ba41a4ed7e7..64b10e52a85 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -56,11 +56,6 @@ IMPLEMENT_SETTING_ENUM(OverflowMode, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, {"break", OverflowMode::BREAK}}) -IMPLEMENT_SETTING_ENUM(DistributedCacheLogMode, ErrorCodes::BAD_ARGUMENTS, - {{"nothing", DistributedCacheLogMode::LOG_NOTHING}, - {"on_error", DistributedCacheLogMode::LOG_ON_ERROR}, - {"all", DistributedCacheLogMode::LOG_ALL}}) - IMPLEMENT_SETTING_ENUM(DistributedCachePoolBehaviourOnLimit, ErrorCodes::BAD_ARGUMENTS, {{"wait", DistributedCachePoolBehaviourOnLimit::WAIT}, {"allocate_bypassing_pool", DistributedCachePoolBehaviourOnLimit::ALLOCATE_NEW_BYPASSING_POOL}}); From c6d8cf2afabd99c26881816dd283fddd42b4171c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:51:31 +0100 Subject: [PATCH 091/356] Fix build --- src/Common/SystemLogBase.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 4dee6d905d9..aef4e19a70c 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -260,10 +260,4 @@ void SystemLogBase::add(LogElement element) template void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } -#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; -SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) - -#define INSTANTIATE_SYSTEM_LOG_QUEUE(ELEMENT) template class SystemLogQueue; -SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE) - } From a46cb36368a5595e84937e24c427afc93756a54b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2024 23:51:48 +0100 Subject: [PATCH 092/356] Fix build --- src/Core/SettingsEnums.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 64b10e52a85..04e1d0a18c8 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -56,9 +56,6 @@ IMPLEMENT_SETTING_ENUM(OverflowMode, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, {"break", OverflowMode::BREAK}}) -IMPLEMENT_SETTING_ENUM(DistributedCachePoolBehaviourOnLimit, ErrorCodes::BAD_ARGUMENTS, - {{"wait", DistributedCachePoolBehaviourOnLimit::WAIT}, - {"allocate_bypassing_pool", DistributedCachePoolBehaviourOnLimit::ALLOCATE_NEW_BYPASSING_POOL}}); IMPLEMENT_SETTING_ENUM(OverflowModeGroupBy, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, From 1c9bd029ea427d6ecbe4eca0a2d645ecef305e08 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Feb 2024 00:08:36 +0100 Subject: [PATCH 093/356] Synchronize parsers --- src/Parsers/ASTSystemQuery.cpp | 8 ++++++++ src/Parsers/ASTSystemQuery.h | 5 +++++ src/Parsers/ParserSystemQuery.cpp | 20 ++++++++++++++------ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 0713737af95..66f949ae3b5 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -172,6 +172,8 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: + case Type::START_VIRTUAL_PARTS_UPDATE: + case Type::STOP_VIRTUAL_PARTS_UPDATE: { if (table) { @@ -294,6 +296,12 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s } break; } + case Type::DROP_DISTRIBUTED_CACHE: + { + if (!distributed_cache_servive_id.empty()) + settings.ostr << (settings.hilite ? hilite_none : "") << " " << distributed_cache_servive_id; + break; + } case Type::UNFREEZE: { print_keyword(" WITH NAME "); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 9aa90f499d0..b6fa790315e 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -30,6 +30,7 @@ public: DROP_QUERY_CACHE, DROP_COMPILED_EXPRESSION_CACHE, DROP_FILESYSTEM_CACHE, + DROP_DISTRIBUTED_CACHE, DROP_DISK_METADATA_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, @@ -98,6 +99,8 @@ public: STOP_VIEWS, CANCEL_VIEW, TEST_VIEW, + STOP_VIRTUAL_PARTS_UPDATE, + START_VIRTUAL_PARTS_UPDATE, END }; @@ -126,6 +129,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; + String distributed_cache_servive_id; + std::string key_to_drop; std::optional offset_to_drop; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index a50e65aa134..facf1f8b820 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -14,11 +14,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SUPPORT_IS_DISABLED; -} - [[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { @@ -397,6 +392,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: + case Type::STOP_VIRTUAL_PARTS_UPDATE: + case Type::START_VIRTUAL_PARTS_UPDATE: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -470,6 +467,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } + case Type::DROP_DISTRIBUTED_CACHE: + { + ParserLiteral parser; + ASTPtr ast; + if (!parser.parse(pos, ast, expected)) + return false; + res->distributed_cache_servive_id = ast->as()->value.safeGet(); + break; + } case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; @@ -482,7 +488,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } case Type::DROP_DISK_METADATA_CACHE: { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); + if (!parseQueryWithOnClusterAndTarget(res, pos, expected, SystemQueryTargetType::Disk)) + return false; + break; } case Type::DROP_SCHEMA_CACHE: { From fc58ccfa9ee0a37539152d11e401c979f63d4713 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 28 Feb 2024 20:15:54 -0800 Subject: [PATCH 094/356] Fix_max_query_size_for_kql_compound_operato: Fix the issue of max_query_size for kql compound operator like mv-expand this fix another use case for PR 59626 --- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 +++-- tests/queries/0_stateless/02366_kql_mvexpand.sql | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 29b26b4e3fe..a54a2b0eda9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -416,8 +416,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserToken s_dash(TokenType::Minus); if (s_dash.ignore(pos, expected)) { - String tmp_op(op_pos_begin->begin, pos->end); - kql_operator = tmp_op; + if (!isValidKQLPos(pos)) + return false; + kql_operator = String(op_pos_begin->begin, pos->end); } else --pos; diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql index e7798609646..ac1a6d9a8f4 100644 --- a/tests/queries/0_stateless/02366_kql_mvexpand.sql +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -33,3 +33,7 @@ print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bo mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; mv_expand_test_table | mv-expand c to typeof(bool); +SET max_query_size = 28; +SET dialect='kusto'; +mv_expand_test_table | mv-expand c, d; -- { serverError 62 } +SET max_query_size=262144; From f89cb57d02603f848e1d6108a35bae51d4dd636c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Feb 2024 12:32:27 +0800 Subject: [PATCH 095/356] fix failed ut --- src/Functions/multiIf.cpp | 85 +++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index b0a344be96e..3555c195bf0 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -278,18 +278,17 @@ public: if (which.is##TYPE()) \ { \ MutableColumnPtr res = result_type->createColumn(); \ - res->reserve(rows); \ if (result_type->isNullable()) \ { \ auto & res_nullable = assert_cast(*res); \ auto & res_data = assert_cast &>(res_nullable.getNestedColumn()).getData(); \ auto & res_null_map = res_nullable.getNullMapData(); \ - executeInstructionsColumnar(instructions, rows, res_data, &res_null_map); \ + executeInstructionsColumnar(instructions, rows, res_data, &res_null_map); \ } \ else \ { \ auto & res_data = assert_cast &>(*res).getData(); \ - executeInstructionsColumnar(instructions, rows, res_data, nullptr); \ + executeInstructionsColumnar(instructions, rows, res_data, nullptr); \ } \ return std::move(res); \ } @@ -409,7 +408,7 @@ private: } } - template + template static NO_INLINE void executeInstructionsColumnar( std::vector & instructions, size_t rows, @@ -419,50 +418,56 @@ private: PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); - if (!res_null_map) + res_data.resize_exact(rows); + if (res_null_map) + res_null_map->resize_exact(rows); + + std::vector data_cols(instructions.size(), nullptr); + std::vector null_map_cols(instructions.size(), nullptr); + for (size_t i = 0; i < instructions.size(); ++i) { - for (size_t row_i = 0; row_i < rows; ++row_i) + if (instructions[i].source->isNullable()) { - auto & instruction = instructions[inserts[row_i]]; - auto ref = instruction.source->getDataAt(row_i); - res_data[row_i] = *reinterpret_cast(ref.data); - } - } - else - { - std::vector data_cols(instructions.size()); - std::vector null_map_cols(instructions.size()); - PaddedPODArray shared_null_map(rows, 0); - for (size_t i = 0; i < instructions.size(); ++i) - { - if (instructions[i].source->isNullable()) - { - const ColumnNullable * nullable_col; - if (!instructions[i].source_is_constant) - nullable_col = assert_cast(instructions[i].source.get()); - else - { - const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); - nullable_col = assert_cast(data_column.get()); - } - null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); - data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); - } + const ColumnNullable * nullable_col; + if (!instructions[i].source_is_constant) + nullable_col = assert_cast(instructions[i].source.get()); else { - null_map_cols[i] = shared_null_map.data(); - data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); + const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); + nullable_col = assert_cast(data_column.get()); + } + null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); + data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); + } + else + { + data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); + } + } + + std::unique_ptr> shared_null_map; + if constexpr (nullable_result) + { + for (auto & col : null_map_cols) + { + if (!col) + { + if (!shared_null_map) + shared_null_map = std::make_unique>(rows, 0); + + col = shared_null_map->data(); } } + } - for (size_t row_i = 0; row_i < rows; ++row_i) - { - S insert = inserts[row_i]; - auto & instruction = instructions[insert]; - size_t index = instruction.source_is_constant ? 0 : row_i; - res_data[row_i] = *(data_cols[insert] + index); + for (size_t row_i = 0; row_i < rows; ++row_i) + { + S insert = inserts[row_i]; + auto & instruction = instructions[insert]; + size_t index = instruction.source_is_constant ? 0 : row_i; + res_data[row_i] = *(data_cols[insert] + index); + if constexpr (nullable_result) (*res_null_map)[row_i] = *(null_map_cols[insert] + index); - } } } From f7d173980806bb2219955013627dee3cd6dfdc42 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Feb 2024 15:00:09 +0800 Subject: [PATCH 096/356] fix failed uts --- src/Functions/multiIf.cpp | 30 +++++++++++++----------------- tests/performance/multiif.xml | 8 ++++++++ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 3555c195bf0..cbadbcc0e98 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -418,6 +418,11 @@ private: PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); + for (size_t i=0; i(inserts[i]) << std::endl; + } + res_data.resize_exact(rows); if (res_null_map) res_null_map->resize_exact(rows); @@ -426,23 +431,14 @@ private: std::vector null_map_cols(instructions.size(), nullptr); for (size_t i = 0; i < instructions.size(); ++i) { - if (instructions[i].source->isNullable()) - { - const ColumnNullable * nullable_col; - if (!instructions[i].source_is_constant) - nullable_col = assert_cast(instructions[i].source.get()); - else - { - const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); - nullable_col = assert_cast(data_column.get()); - } - null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); - data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); - } - else - { - data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); - } + auto & instruction = instructions[i]; + const IColumn * non_const_col = instructions[i].source_is_constant + ? &assert_cast(*instruction.source).getDataColumn() + : instruction.source.get(); + const ColumnNullable * nullable_col = checkAndGetColumn(non_const_col); + data_cols[i] = nullable_col ? assert_cast &>(nullable_col->getNestedColumn()).getData().data() + : assert_cast &>(*non_const_col).getData().data(); + null_map_cols[i] = nullable_col ? assert_cast(nullable_col->getNullMapColumn()).getData().data() : nullptr; } std::unique_ptr> shared_null_map; diff --git a/tests/performance/multiif.xml b/tests/performance/multiif.xml index ad56ab3f5f2..0c2d95cc553 100644 --- a/tests/performance/multiif.xml +++ b/tests/performance/multiif.xml @@ -5,4 +5,12 @@ select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1 DROP TABLE IF EXISTS test_multiif_t + + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Nullable(Decimal256(3))), rand(2) % 2 = 0, materialize(2::Nullable(Decimal256(3))), rand(3) % 2 = 0, materialize(3::Nullable(Decimal256(3))), rand(4) % 2 = 0, materialize(4::Nullable(Decimal256(3))), rand(5) % 2 = 0, materialize(5::Nullable(Decimal256(3))), materialize(6::Nullable(Decimal256(3))))) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Decimal256(3)), rand(2) % 2 = 0, materialize(2::Decimal256(3)), rand(3) % 2 = 0, materialize(3::Decimal256(3)), rand(4) % 2 = 0, materialize(4::Decimal256(3)), rand(5) % 2 = 0, materialize(5::Decimal256(3)), materialize(6::Decimal256(3)))) + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2 = 0, rand()+2, rand() % 3 = 0, rand()+3, rand() % 4 = 0, rand()+4, rand() % 5 = 0, rand() + 5, rand() % 6 = 0, rand() + 6, rand())) From 656412c93ee87c5ba0656833b47d4796bb793c25 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Feb 2024 15:03:59 +0800 Subject: [PATCH 097/356] fix failed uts --- src/Functions/multiIf.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index cbadbcc0e98..a9584738a91 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -418,11 +418,6 @@ private: PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); - for (size_t i=0; i(inserts[i]) << std::endl; - } - res_data.resize_exact(rows); if (res_null_map) res_null_map->resize_exact(rows); From 46f9fddc4d9f823c8c2fa283aed20706f2d4af4d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Feb 2024 15:11:02 +0800 Subject: [PATCH 098/356] fix code style --- src/Functions/multiIf.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index a9584738a91..5bf9f2af420 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -368,11 +368,11 @@ private: /// We should read source from which instruction on each row? template - static NO_INLINE void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) + static NO_INLINE void calculateInserts(const std::vector & instructions, size_t rows, PaddedPODArray & inserts) { for (S i = instructions.size() - 1; i != static_cast(-1); --i) { - auto & instruction = instructions[i]; + const auto & instruction = instructions[i]; if (instruction.condition_always_true) { for (size_t row_i = 0; row_i < rows; ++row_i) @@ -410,7 +410,7 @@ private: template static NO_INLINE void executeInstructionsColumnar( - std::vector & instructions, + const std::vector & instructions, size_t rows, PaddedPODArray & res_data, PaddedPODArray * res_null_map = nullptr) @@ -426,7 +426,7 @@ private: std::vector null_map_cols(instructions.size(), nullptr); for (size_t i = 0; i < instructions.size(); ++i) { - auto & instruction = instructions[i]; + const auto & instruction = instructions[i]; const IColumn * non_const_col = instructions[i].source_is_constant ? &assert_cast(*instruction.source).getDataColumn() : instruction.source.get(); @@ -454,7 +454,7 @@ private: for (size_t row_i = 0; row_i < rows; ++row_i) { S insert = inserts[row_i]; - auto & instruction = instructions[insert]; + const auto & instruction = instructions[insert]; size_t index = instruction.source_is_constant ? 0 : row_i; res_data[row_i] = *(data_cols[insert] + index); if constexpr (nullable_result) From 9b3a0273c720112c04b018b2a9aec0ac1765c1d1 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 29 Feb 2024 17:11:08 +0800 Subject: [PATCH 099/356] Update settings.md by adding some missing settings. --- docs/zh/operations/settings/settings.md | 37 +++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 1874970ac95..69f76a35edc 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -649,11 +649,22 @@ log_query_threads=1 ## max_query_size {#settings-max_query_size} -查询的最大部分,å¯ä»¥è¢«å¸¦åˆ°RAM用于使用SQL解æžå™¨è¿›è¡Œè§£æžã€‚ -æ’入查询还包å«ç”±å•ç‹¬çš„æµè§£æžå™¨ï¼ˆæ¶ˆè€—O(1)RAM)处ç†çš„æ’入数æ®ï¼Œè¿™äº›æ•°æ®ä¸åŒ…å«åœ¨æ­¤é™åˆ¶ä¸­ã€‚ +SQL 解æžå™¨è§£æžçš„查询字符串的最大字节数。 INSERT 查询的 VALUES å­å¥ä¸­çš„æ•°æ®ç”±å•ç‹¬çš„æµè§£æžå™¨ï¼ˆæ¶ˆè€— O(1) RAM)处ç†ï¼Œå¹¶ä¸”ä¸å—æ­¤é™åˆ¶çš„å½±å“。 默认值:256KiB。 + +## max_parser_depth {#max_parser_depth} + +é™åˆ¶é€’归下é™è§£æžå™¨ä¸­çš„最大递归深度。å…许控制堆栈大å°ã€‚ + +å¯èƒ½çš„值: + +- 正整数。 +- 0 — 递归深度ä¸å—é™åˆ¶ã€‚ + +默认值:1000。 + ## interactive_delay {#interactive-delay} 以微秒为å•ä½çš„间隔,用于检查请求执行是å¦å·²è¢«å–消并å‘é€è¿›åº¦ã€‚ @@ -1064,6 +1075,28 @@ ClickHouse生æˆå¼‚常 默认值:0。 +## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns} + +å¯ç”¨æˆ–ç¦ç”¨é€šè¿‡å°†æŸäº›å‡½æ•°è½¬æ¢ä¸ºè¯»å–å­åˆ—的优化。这å‡å°‘了è¦è¯»å–çš„æ•°æ®é‡ã€‚ + +这些函数å¯ä»¥è½¬åŒ–为: + +- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [empty](../../sql-reference/functions/array-functions.md/#function-empty) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [isNull](../../sql-reference/operators/index.md#operator-is-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 +- [count](../../sql-reference/aggregate-functions/reference/count.md) è¯»å– [null](../../sql-reference/data-types/nullable.md/ #finding-null) å­åˆ—。 +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) è¯»å– [keys](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 +- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) è¯»å– [values](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 + +å¯èƒ½çš„值: + +- 0 — ç¦ç”¨ä¼˜åŒ–。 +- 1 — 优化已å¯ç”¨ã€‚ + +默认值:`0`。 + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - 类型:秒 From 7116dd7dca60e05234b1edf04396e9ec492d0b08 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 29 Feb 2024 17:22:25 +0800 Subject: [PATCH 100/356] Update array.md by adding `Array Size` part --- docs/zh/sql-reference/data-types/array.md | 26 ++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index 46c40b889ad..41ed98fd053 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/array --- -# 阵列(T) {#data-type-array} +# 数组(T) {#data-type-array} ç”± `T` 类型元素组æˆçš„数组。 @@ -66,3 +66,27 @@ SELECT array(1, 'a') Received exception from server (version 1.1.54388): Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. ``` + +## æ•°ç»„å¤§å° {#array-size} + +å¯ä»¥ä½¿ç”¨`size0`å­åˆ—找到数组的大å°ï¼Œè€Œæ— éœ€è¯»å–整个列。对于多维数组,您å¯ä»¥ä½¿ç”¨`sizeN-1`,其中`N`是所需的维度。 + +**例å­** + +SQL查询: + +```sql +CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]); + +SELECT arr.size0, arr.size1, arr.size2 FROM t_arr; +``` + +结果: + +``` text +┌─arr.size0─┬─arr.size1─┬─arr.size2─┠+│ 1 │ [2] │ [[4,1]] │ +└───────────┴───────────┴───────────┘ +``` From ea9ef507fcca531d470a62462e571051ec7bf633 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 29 Feb 2024 17:29:37 +0800 Subject: [PATCH 101/356] Update nullable.md by adding #finding-null --- docs/zh/sql-reference/data-types/nullable.md | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/zh/sql-reference/data-types/nullable.md b/docs/zh/sql-reference/data-types/nullable.md index 94311f8298a..7ecbc3d1f40 100644 --- a/docs/zh/sql-reference/data-types/nullable.md +++ b/docs/zh/sql-reference/data-types/nullable.md @@ -20,6 +20,34 @@ slug: /zh/sql-reference/data-types/nullable 掩ç æ–‡ä»¶ä¸­çš„æ¡ç›®å…许ClickHouse区分æ¯ä¸ªè¡¨è¡Œçš„对应数æ®ç±»åž‹çš„«NULL»和默认值由于有é¢å¤–的文件,«Nullable»列比普通列消耗更多的存储空间 +## nullå­åˆ— {#finding-null} + +It is possible to find `NULL` values in a column by using `null` subcolumn without reading the whole column. It returns `1` if the corresponding value is `NULL` and `0` otherwise. +通过使用`null`å­åˆ—å¯ä»¥åœ¨åˆ—中查找`NULL`值,而无需读å–整个列。如果对应的值为`NULL`,则返回`1`,å¦åˆ™è¿”回`0`。 + +**示例** + +SQL查询: + +``` sql +CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nullable VALUES (1) (NULL) (2) (NULL); + +SELECT n.null FROM nullable; +``` + +结果: + +``` text +┌─n.null─┠+│ 0 │ +│ 1 │ +│ 0 │ +│ 1 │ +└────────┘ +``` + ## 用法示例 {#yong-fa-shi-li} ``` sql From 6ea5fabac51ba5ab52276f799a69e7bfe0ddcc09 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 29 Feb 2024 17:39:07 +0800 Subject: [PATCH 102/356] Update settings.md --- docs/zh/operations/settings/settings.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 69f76a35edc..307159aa5a1 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1082,11 +1082,11 @@ ClickHouse生æˆå¼‚常 这些函数å¯ä»¥è½¬åŒ–为: - [length](../../sql-reference/functions/array-functions.md/#array_functions-length) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 -- [empty](../../sql-reference/functions/array-functions.md/#function-empty) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 -- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [empty](../../sql-reference/functions/array-functions.md/#empty函数) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [notEmpty](../../sql-reference/functions/array-functions.md/#notempty函数) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 - [isNull](../../sql-reference/operators/index.md#operator-is-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 - [isNotNull](../../sql-reference/operators/index.md#is-not-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 -- [count](../../sql-reference/aggregate-functions/reference/count.md) è¯»å– [null](../../sql-reference/data-types/nullable.md/ #finding-null) å­åˆ—。 +- [count](../../sql-reference/aggregate-functions/reference/count.md) è¯»å– [null](../../sql-reference/data-types/nullable.md/#finding-null) å­åˆ—。 - [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) è¯»å– [keys](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 - [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) è¯»å– [values](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 From 0b4622ebf640ffb4d31a97d39ec4034007528c31 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 29 Feb 2024 09:45:08 +0000 Subject: [PATCH 103/356] Add more suppressions for utils --- programs/copier/ZooKeeperStaff.h | 2 +- src/Common/LoggingFormatStringHelpers.h | 8 ++++---- utils/memcpy-bench/FastMemcpy.h | 6 ++++-- utils/memcpy-bench/FastMemcpy_Avx.h | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index bbdec230d2d..c15db73f060 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -180,7 +180,7 @@ public: auto logger = getLogger("ClusterCopier"); if (rsp.error == Coordination::Error::ZOK) { - switch (rsp.type) + switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case) { case Coordination::CREATED: LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index b0f0a5cd716..c60c74ee663 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -26,7 +26,7 @@ struct FormatStringHelperImpl formatStringCheckArgsNumImpl(message_format_string, sizeof...(Args)); } template - FormatStringHelperImpl(fmt::basic_runtime && str) : message_format_string(), fmt_str(std::forward>(str)) {} + FormatStringHelperImpl(fmt::basic_runtime && str) : message_format_string(), fmt_str(std::forward>(str)) {} /// NOLINT PreformattedMessage format(Args && ...args) const; }; @@ -43,9 +43,9 @@ struct PreformattedMessage template static PreformattedMessage create(FormatStringHelper fmt, Args &&... args); - operator const std::string & () const { return text; } - operator std::string () && { return std::move(text); } - operator fmt::format_string<> () const { UNREACHABLE(); } + operator const std::string & () const { return text; } /// NOLINT + operator std::string () && { return std::move(text); } /// NOLINT + operator fmt::format_string<> () const { UNREACHABLE(); } /// NOLINT void apply(std::string & out_text, std::string_view & out_format_string) const & { diff --git a/utils/memcpy-bench/FastMemcpy.h b/utils/memcpy-bench/FastMemcpy.h index 85d09c5f53e..e2ac73a1b63 100644 --- a/utils/memcpy-bench/FastMemcpy.h +++ b/utils/memcpy-bench/FastMemcpy.h @@ -33,9 +33,11 @@ #endif #endif +/// NOLINTBEGIN(modernize-use-using) typedef __attribute__((__aligned__(1))) uint16_t uint16_unaligned_t; typedef __attribute__((__aligned__(1))) uint32_t uint32_unaligned_t; typedef __attribute__((__aligned__(1))) uint64_t uint64_unaligned_t; +/// NOLINTEND(modernize-use-using) //--------------------------------------------------------------------- // fast copy for different sizes @@ -98,7 +100,7 @@ __attribute__((__no_sanitize__("undefined"))) inline void *memcpy_tiny(void * __ unsigned char *dd = ((unsigned char*)dst) + size; const unsigned char *ss = ((const unsigned char*)src) + size; - switch (size) + switch (size) /// NOLINT(bugprone-switch-missing-default-case) { case 64: memcpy_sse2_64(dd - 64, ss - 64); @@ -652,7 +654,7 @@ __attribute__((__no_sanitize__("undefined"))) inline void *memcpy_tiny(void * __ //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) +inline void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) { unsigned char *dst = (unsigned char*)destination; const unsigned char *src = (const unsigned char*)source; diff --git a/utils/memcpy-bench/FastMemcpy_Avx.h b/utils/memcpy-bench/FastMemcpy_Avx.h index ee7d4e19536..3271e10d237 100644 --- a/utils/memcpy-bench/FastMemcpy_Avx.h +++ b/utils/memcpy-bench/FastMemcpy_Avx.h @@ -103,7 +103,7 @@ static INLINE void *memcpy_tiny_avx(void * __restrict dst, const void * __restri unsigned char *dd = reinterpret_cast(dst) + size; const unsigned char *ss = reinterpret_cast(src) + size; - switch (size) + switch (size) /// NOLINT(bugprone-switch-missing-default-case) { case 128: memcpy_avx_128(dd - 128, ss - 128); [[fallthrough]]; case 0: break; @@ -371,7 +371,7 @@ static INLINE void *memcpy_tiny_avx(void * __restrict dst, const void * __restri //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) +inline void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) { unsigned char *dst = reinterpret_cast(destination); const unsigned char *src = reinterpret_cast(source); From e27472c09dc94e1ddaa8e5972d74ffe3d3e07eb1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 29 Feb 2024 09:47:44 +0000 Subject: [PATCH 104/356] Correct changes in LoggingFormatStringHelpers.h --- src/Common/LoggingFormatStringHelpers.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index c60c74ee663..73bf53a955b 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -26,7 +26,7 @@ struct FormatStringHelperImpl formatStringCheckArgsNumImpl(message_format_string, sizeof...(Args)); } template - FormatStringHelperImpl(fmt::basic_runtime && str) : message_format_string(), fmt_str(std::forward>(str)) {} /// NOLINT + explicit FormatStringHelperImpl(fmt::basic_runtime && str) : fmt_str(std::forward>(str)) {} PreformattedMessage format(Args && ...args) const; }; @@ -43,9 +43,9 @@ struct PreformattedMessage template static PreformattedMessage create(FormatStringHelper fmt, Args &&... args); - operator const std::string & () const { return text; } /// NOLINT - operator std::string () && { return std::move(text); } /// NOLINT - operator fmt::format_string<> () const { UNREACHABLE(); } /// NOLINT + explicit operator const std::string & () const { return text; } + explicit operator std::string () && { return std::move(text); } + explicit operator fmt::format_string<> () const { UNREACHABLE(); } void apply(std::string & out_text, std::string_view & out_format_string) const & { From 39041bde260f694d755a3cab980a66a47afbfad3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 29 Feb 2024 11:06:41 +0100 Subject: [PATCH 105/356] Add timeouts when waiting for commit logs --- src/Coordination/KeeperContext.cpp | 42 +++++++++++++++++++++++++-- src/Coordination/KeeperContext.h | 24 ++++++--------- src/Coordination/KeeperDispatcher.cpp | 39 ++++++++++++------------- src/Coordination/KeeperDispatcher.h | 3 +- 4 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index b06e321aeec..7c1ff55245e 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -374,11 +375,16 @@ void KeeperContext::updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfig bool KeeperContext::setShutdownCalled() { - std::unique_lock lock(local_logs_preprocessed_cv_mutex); + std::unique_lock local_logs_preprocessed_lock(local_logs_preprocessed_cv_mutex); + std::unique_lock last_committed_log_idx_lock(last_committed_log_idx_cv_mutex); + if (!shutdown_called.exchange(true)) { - lock.unlock(); + local_logs_preprocessed_lock.unlock(); + last_committed_log_idx_lock.unlock(); + local_logs_preprocessed_cv.notify_all(); + last_committed_log_idx_cv.notify_all(); return true; } @@ -410,4 +416,36 @@ const CoordinationSettingsPtr & KeeperContext::getCoordinationSettings() const return coordination_settings; } +uint64_t KeeperContext::lastCommittedIndex() const +{ + return last_committed_log_idx.load(std::memory_order_relaxed); +} + +void KeeperContext::setLastCommitIndex(uint64_t commit_index) +{ + bool should_notify; + { + std::lock_guard lock(last_committed_log_idx_cv_mutex); + last_committed_log_idx.store(commit_index, std::memory_order_relaxed); + + should_notify = wait_commit_upto_idx.has_value() && commit_index >= wait_commit_upto_idx; + } + + if (should_notify) + last_committed_log_idx_cv.notify_all(); +} + +bool KeeperContext::waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms) +{ + std::unique_lock lock(last_committed_log_idx_cv_mutex); + wait_commit_upto_idx = log_idx; + bool success = last_committed_log_idx_cv.wait_for( + lock, + std::chrono::milliseconds(wait_timeout_ms), + [&] { return shutdown_called || last_committed_log_idx >= wait_commit_upto_idx; }); + + wait_commit_upto_idx.reset(); + return success; +} + } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index a7169e64387..e283e65dffa 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -76,21 +76,10 @@ public: void waitLocalLogsPreprocessedOrShutdown(); - uint64_t lastCommittedIndex() const - { - return last_committed_log_idx.load(std::memory_order_relaxed); - } - - void setLastCommitIndex(uint64_t commit_index) - { - last_committed_log_idx.store(commit_index, std::memory_order_relaxed); - last_committed_log_idx.notify_all(); - } - - void waitLastCommittedIndexUpdated(uint64_t current_last_committed_idx) - { - last_committed_log_idx.wait(current_last_committed_idx, std::memory_order_relaxed); - } + uint64_t lastCommittedIndex() const; + void setLastCommitIndex(uint64_t commit_index); + /// returns true if the log is committed, false if timeout happened + bool waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms); const CoordinationSettingsPtr & getCoordinationSettings() const; @@ -142,6 +131,11 @@ private: std::atomic last_committed_log_idx = 0; + /// will be set by dispatcher when waiting for certain commits + std::optional wait_commit_upto_idx = 0; + std::mutex last_committed_log_idx_cv_mutex; + std::condition_variable last_committed_log_idx_cv; + CoordinationSettingsPtr coordination_settings; }; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 959ded47f27..7af9c65e9d3 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -5,6 +5,7 @@ #include #include +#include "Common/ZooKeeper/IKeeper.h" #include #include #include @@ -211,10 +212,13 @@ void KeeperDispatcher::requestThread() if (shutdown_called) break; + bool execute_requests_after_write = has_read_request || has_reconfig_request; + nuraft::ptr result_buf = nullptr; /// Forcefully process all previous pending requests if (prev_result) - result_buf = forceWaitAndProcessResult(prev_result, prev_batch); + result_buf + = forceWaitAndProcessResult(prev_result, prev_batch, /*clear_requests_on_success=*/true); /// Process collected write requests batch if (!current_batch.empty()) @@ -235,10 +239,11 @@ void KeeperDispatcher::requestThread() } /// If we will execute read or reconfig next, we have to process result now - if (has_read_request || has_reconfig_request) + if (execute_requests_after_write) { if (prev_result) - result_buf = forceWaitAndProcessResult(prev_result, current_batch); + result_buf = forceWaitAndProcessResult( + prev_result, current_batch, /*clear_requests_on_success=*/!execute_requests_after_write); /// In case of older version or disabled async replication, result buf will be set to value of `commit` function /// which always returns nullptr @@ -250,18 +255,12 @@ void KeeperDispatcher::requestThread() nuraft::buffer_serializer bs(result_buf); auto log_idx = bs.get_u64(); - /// we will wake up this thread on each commit so we need to run it in loop until the last request of batch is committed - while (true) - { - if (shutdown_called) - return; + /// if timeout happened set error responses for the requests + if (!keeper_context->waitCommittedUpto(log_idx, coordination_settings->operation_timeout_ms.totalMilliseconds())) + addErrorResponses(current_batch, Coordination::Error::ZOPERATIONTIMEOUT); - auto current_last_committed_idx = keeper_context->lastCommittedIndex(); - if (current_last_committed_idx >= log_idx) - break; - - keeper_context->waitLastCommittedIndexUpdated(current_last_committed_idx); - } + if (shutdown_called) + return; } } @@ -501,10 +500,6 @@ void KeeperDispatcher::shutdown() LOG_DEBUG(log, "Shutting down storage dispatcher"); - /// some threads can be waiting for certain commits, so we set value - /// of the last commit index to something that will always unblock - keeper_context->setLastCommitIndex(std::numeric_limits::max()); - if (session_cleaner_thread.joinable()) session_cleaner_thread.join(); @@ -718,7 +713,8 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession } } -nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) +nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult( + RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) { if (!result->has_result()) result->get(); @@ -732,7 +728,10 @@ nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult(RaftAppe auto result_buf = result->get(); result = nullptr; - requests_for_sessions.clear(); + + if (!result_buf || clear_requests_on_success) + requests_for_sessions.clear(); + return result_buf; } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index e8ee486be88..231ef7e94e9 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -100,7 +100,8 @@ private: /// Forcefully wait for result and sets errors if something when wrong. /// Clears both arguments - nuraft::ptr forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); + nuraft::ptr forceWaitAndProcessResult( + RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); public: std::mutex read_request_queue_mutex; From 117764e53cc740522ff5cfbd2a6aec74ad4a53df Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 29 Feb 2024 11:33:26 +0000 Subject: [PATCH 106/356] Revert "Correct changes in LoggingFormatStringHelpers.h" This reverts commit e27472c09dc94e1ddaa8e5972d74ffe3d3e07eb1. --- src/Common/LoggingFormatStringHelpers.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 73bf53a955b..c60c74ee663 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -26,7 +26,7 @@ struct FormatStringHelperImpl formatStringCheckArgsNumImpl(message_format_string, sizeof...(Args)); } template - explicit FormatStringHelperImpl(fmt::basic_runtime && str) : fmt_str(std::forward>(str)) {} + FormatStringHelperImpl(fmt::basic_runtime && str) : message_format_string(), fmt_str(std::forward>(str)) {} /// NOLINT PreformattedMessage format(Args && ...args) const; }; @@ -43,9 +43,9 @@ struct PreformattedMessage template static PreformattedMessage create(FormatStringHelper fmt, Args &&... args); - explicit operator const std::string & () const { return text; } - explicit operator std::string () && { return std::move(text); } - explicit operator fmt::format_string<> () const { UNREACHABLE(); } + operator const std::string & () const { return text; } /// NOLINT + operator std::string () && { return std::move(text); } /// NOLINT + operator fmt::format_string<> () const { UNREACHABLE(); } /// NOLINT void apply(std::string & out_text, std::string_view & out_format_string) const & { From 42437a2ae14c142ee629021c71fac6ea6107defc Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 29 Feb 2024 03:27:32 -0800 Subject: [PATCH 107/356] Userspace page cache (#53770) * Userspace page cache * Maybe it'll build this time, who knows. * 'auto' went out of fashion, I guess * Documentation, tsan workaround, metric 'UnreclaimableRss', disable page cache in the test that uses DatabaseOrdinary * Moved CachedInMemoryReadBufferFromFile to object store level, changed settings, addressed other comments. * Fix * Another fix * Fix restricted seek, fix ppc64le build * Don't allow page cache with file cache * Adjust tests a little * Fix clang-tidy * Conflicts * Comments * Maybe unbroke AsynchronousBoundedReadBuffer * SettingsChangesHistory.h * Fix warning in test --- docs/en/operations/storing-data.md | 10 + .../example-datasets/opensky.mdx | 12 +- programs/server/Server.cpp | 7 + src/Access/Common/AccessType.h | 1 + src/Common/PageCache.cpp | 688 ++++++++++++++++++ src/Common/PageCache.h | 299 ++++++++ src/Common/ProfileEvents.cpp | 9 + src/Core/Defines.h | 9 + src/Core/ServerSettings.h | 7 +- src/Core/Settings.h | 4 + src/Core/SettingsChangesHistory.h | 3 + .../IO/AsynchronousBoundedReadBuffer.cpp | 12 +- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 67 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 9 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 2 + src/Disks/IO/ThreadPoolRemoteFSReader.h | 3 + .../AzureBlobStorage/AzureObjectStorage.cpp | 8 +- .../ObjectStorages/DiskObjectStorage.cpp | 3 +- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 4 +- .../Local/LocalObjectStorage.cpp | 6 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 9 +- .../ObjectStorages/Web/WebObjectStorage.cpp | 7 +- src/IO/AsynchronousReader.h | 3 + src/IO/BufferBase.h | 3 + src/IO/CachedInMemoryReadBufferFromFile.cpp | 188 +++++ src/IO/CachedInMemoryReadBufferFromFile.h | 41 ++ src/IO/ReadBuffer.h | 19 +- src/IO/ReadSettings.h | 7 + src/Interpreters/Context.cpp | 41 +- src/Interpreters/Context.h | 5 + src/Interpreters/InterpreterSystemQuery.cpp | 9 + .../ServerAsynchronousMetrics.cpp | 12 + src/Interpreters/tests/gtest_page_cache.cpp | 267 +++++++ src/Parsers/ASTSystemQuery.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 - src/Storages/StorageS3.cpp | 18 +- tests/clickhouse-test | 2 + .../01271_show_privileges.reference | 1 + .../0_stateless/02867_page_cache.reference | 23 + .../queries/0_stateless/02867_page_cache.sql | 105 +++ 41 files changed, 1854 insertions(+), 76 deletions(-) create mode 100644 src/Common/PageCache.cpp create mode 100644 src/Common/PageCache.h create mode 100644 src/IO/CachedInMemoryReadBufferFromFile.cpp create mode 100644 src/IO/CachedInMemoryReadBufferFromFile.h create mode 100644 src/Interpreters/tests/gtest_page_cache.cpp create mode 100644 tests/queries/0_stateless/02867_page_cache.reference create mode 100644 tests/queries/0_stateless/02867_page_cache.sql diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 003277c8d4f..84251812c01 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -275,6 +275,16 @@ Cache profile events: - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds` +## Using in-memory cache (userspace page cache) {#userspace-page-cache} + +The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files. + +To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`. + +By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`. + +Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled. + ## Storing Data on Web Server {#storing-data-on-webserver} There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index 92cd104e06e..b79c02ab780 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,4 +1,4 @@ ---- +--- slug: /zh/getting-started/example-datasets/opensky sidebar_label: ç©ºä¸­äº¤é€šæ•°æ® description: 该数æ®é›†ä¸­çš„æ•°æ®æ˜¯ä»Žå®Œæ•´çš„ OpenSky æ•°æ®é›†ä¸­è¡ç”Ÿè€Œæ¥çš„,对其中的数æ®è¿›è¡Œäº†å¿…è¦çš„清ç†ï¼Œç”¨ä»¥å±•ç¤ºåœ¨ COVID-19 期间空中交通的å‘展。 @@ -53,12 +53,12 @@ CREATE TABLE opensky ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' ``` -- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处ç†ã€‚ `xargs -P100` 指定最多使用 100 个并行工作程åºï¼Œä½†ç”±äºŽæˆ‘们åªæœ‰ 30 个文件,工作程åºçš„æ•°é‡å°†åªæœ‰ 30 个。 -- 对于æ¯ä¸ªæ–‡ä»¶ï¼Œ`xargs` 将通过 `bash -c` 为æ¯ä¸ªæ–‡ä»¶è¿è¡Œä¸€ä¸ªè„šæœ¬æ–‡ä»¶ã€‚该脚本通过使用 `{}` 表示文件åå ä½ç¬¦ï¼Œç„¶åŽ `xargs` 由命令进行填充(使用 `-I{}`)。 -- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` å‚数),并将输出é‡å®šå‘到 `clickhouse-client`。 -- 我们还è¦æ±‚使用扩展解æžå™¨è§£æž [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区å移的 ISO-8601 æ ¼å¼ã€‚ +- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处ç†ã€‚ `xargs -P100` 指定最多使用 100 个并行工作程åºï¼Œä½†ç”±äºŽæˆ‘们åªæœ‰ 30 个文件,工作程åºçš„æ•°é‡å°†åªæœ‰ 30 个。 +- 对于æ¯ä¸ªæ–‡ä»¶ï¼Œ`xargs` 将通过 `bash -c` 为æ¯ä¸ªæ–‡ä»¶è¿è¡Œä¸€ä¸ªè„šæœ¬æ–‡ä»¶ã€‚该脚本通过使用 `{}` 表示文件åå ä½ç¬¦ï¼Œç„¶åŽ `xargs` 由命令进行填充(使用 `-I{}`)。 +- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` å‚数),并将输出é‡å®šå‘到 `clickhouse-client`。 +- 我们还è¦æ±‚使用扩展解æžå™¨è§£æž [DateTime](/docs/zh/sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format)) 以识别具有时区å移的 ISO-8601 æ ¼å¼ã€‚ -最åŽï¼Œ`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) æ ¼å¼è¯»å–输入数æ®ç„¶åŽæ‰§è¡Œæ’入。 +最åŽï¼Œ`clickhouse-client` 会以 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) æ ¼å¼è¯»å–输入数æ®ç„¶åŽæ‰§è¡Œæ’入。 å¹¶è¡Œå¯¼å…¥éœ€è¦ 24 秒。 diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 6dc33042a05..786cb27d8c4 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1228,6 +1228,13 @@ try } global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); + size_t page_cache_size = server_settings.page_cache_size; + if (page_cache_size != 0) + global_context->setPageCache( + server_settings.page_cache_chunk_size, server_settings.page_cache_mmap_size, + page_cache_size, server_settings.page_cache_use_madv_free, + server_settings.page_cache_use_transparent_huge_pages); + String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy; size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 8172a468f89..de3eda96bac 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -162,6 +162,7 @@ enum class AccessType M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \ + M(SYSTEM_DROP_PAGE_CACHE, "SYSTEM DROP PAGE CACHE, DROP PAGE CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp new file mode 100644 index 00000000000..511ec23d431 --- /dev/null +++ b/src/Common/PageCache.cpp @@ -0,0 +1,688 @@ +#include "PageCache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event PageCacheChunkMisses; + extern const Event PageCacheChunkShared; + extern const Event PageCacheChunkDataHits; + extern const Event PageCacheChunkDataPartialHits; + extern const Event PageCacheChunkDataMisses; + extern const Event PageCacheBytesUnpinnedRoundedToPages; + extern const Event PageCacheBytesUnpinnedRoundedToHugePages; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYSTEM_ERROR; + extern const int MEMORY_LIMIT_EXCEEDED; + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int INVALID_SETTING_VALUE; + extern const int FILE_DOESNT_EXIST; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma clang diagnostic ignored "-Wreadability-make-member-function-const" + +PinnedPageChunk::PinnedPageChunk(PinnedPageChunk && c) noexcept + : cache(std::exchange(c.cache, nullptr)), chunk(std::exchange(c.chunk, nullptr)) {} + +PinnedPageChunk & PinnedPageChunk::operator=(PinnedPageChunk && c) noexcept +{ + if (cache) + cache->removeRef(chunk); + cache = std::exchange(c.cache, nullptr); + chunk = std::exchange(c.chunk, nullptr); + return *this; +} + +PinnedPageChunk::~PinnedPageChunk() noexcept +{ + if (cache) + cache->removeRef(chunk); +} + +PinnedPageChunk::PinnedPageChunk(PageCache * cache_, PageChunk * chunk_) noexcept : cache(cache_), chunk(chunk_) {} + +const PageChunk * PinnedPageChunk::getChunk() const { return chunk; } + +bool PinnedPageChunk::markPagePopulated(size_t page_idx) +{ + bool r = chunk->pages_populated.set(page_idx); + return r; +} + +void PinnedPageChunk::markPrefixPopulated(size_t bytes) +{ + for (size_t i = 0; i < (bytes + chunk->page_size - 1) / chunk->page_size; ++i) + markPagePopulated(i); +} + +bool PinnedPageChunk::isPrefixPopulated(size_t bytes) const +{ + for (size_t i = 0; i < (bytes + chunk->page_size - 1) / chunk->page_size; ++i) + if (!chunk->pages_populated.get(i)) + return false; + return true; +} + +AtomicBitSet::AtomicBitSet() = default; + +void AtomicBitSet::init(size_t nn) +{ + n = nn; + v = std::make_unique[]>((n + 7) / 8); +} + +bool AtomicBitSet::get(size_t i) const +{ + return (v[i / 8] & (1 << (i % 8))) != 0; +} + +bool AtomicBitSet::any() const +{ + for (size_t i = 0; i < (n + 7) / 8; ++i) + if (v[i]) + return true; + return false; +} + +bool AtomicBitSet::set(size_t i) const +{ + UInt8 prev = v[i / 8].fetch_or(1 << (i % 8)); + return (prev & (1 << (i % 8))) == 0; +} + +bool AtomicBitSet::set(size_t i, bool val) const +{ + if (val) + return set(i); + else + return unset(i); +} + +bool AtomicBitSet::unset(size_t i) const +{ + UInt8 prev = v[i / 8].fetch_and(~(1 << (i % 8))); + return (prev & (1 << (i % 8))) != 0; +} + +void AtomicBitSet::unsetAll() const +{ + for (size_t i = 0; i < (n + 7) / 8; ++i) + v[i].store(0, std::memory_order_relaxed); +} + +PageCache::PageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free_, bool use_huge_pages_) + : bytes_per_page(getPageSize()) + , use_madv_free(use_madv_free_) + , use_huge_pages(use_huge_pages_) + , rng(randomSeed()) +{ + if (bytes_per_chunk == 0 || bytes_per_mmap == 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Userspace page cache chunk size and mmap size can't be zero."); + + if (use_huge_pages) + { + use_huge_pages = false; + bool print_warning = false; +#ifdef OS_LINUX + try + { + ReadBufferFromFile in("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); + size_t huge_page_size; + readIntText(huge_page_size, in); + + if (huge_page_size == 0 || huge_page_size % bytes_per_page != 0) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Invalid huge page size reported by the OS: {}", huge_page_size); + + /// THP can be configured to be 2 MiB or 1 GiB in size. 1 GiB is way too big for us. + if (huge_page_size <= (16 << 20)) + { + pages_per_big_page = huge_page_size / bytes_per_page; + use_huge_pages = true; + } + else + { + LOG_WARNING(&Poco::Logger::get("PageCache"), "The OS huge page size is too large for our purposes: {} KiB. Using regular pages. Userspace page cache will be relatively slow.", huge_page_size); + } + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) + throw; + print_warning = true; + } +#else + print_warning = true; +#endif + if (print_warning) + LOG_WARNING(&Poco::Logger::get("PageCache"), "The OS doesn't support transparent huge pages. Userspace page cache will be relatively slow."); + } + + pages_per_chunk = ((bytes_per_chunk - 1) / (bytes_per_page * pages_per_big_page) + 1) * pages_per_big_page; + chunks_per_mmap_target = (bytes_per_mmap - 1) / (bytes_per_page * pages_per_chunk) + 1; + max_mmaps = (bytes_total - 1) / (bytes_per_page * pages_per_chunk * chunks_per_mmap_target) + 1; +} + +PageCache::~PageCache() +{ + chassert(getPinnedSize() == 0); +} + +size_t PageCache::pageSize() const { return bytes_per_page; } +size_t PageCache::chunkSize() const { return bytes_per_page * pages_per_chunk; } +size_t PageCache::maxChunks() const { return chunks_per_mmap_target * max_mmaps; } + +size_t PageCache::getPinnedSize() const +{ + std::unique_lock lock(global_mutex); + return (total_chunks - lru.size()) * bytes_per_page * pages_per_chunk; +} + +PageCache::MemoryStats PageCache::getResidentSetSize() const +{ + MemoryStats stats; +#ifdef OS_LINUX + if (use_madv_free) + { + std::unordered_set cache_mmap_addrs; + for (const auto & m : mmaps) + cache_mmap_addrs.insert(reinterpret_cast(m.ptr)); + + ReadBufferFromFile in("/proc/self/smaps"); + + /// Parse the smaps contents, which is text consisting of entries like this: + /// + /// 117ba4a00000-117be4a00000 rw-p 00000000 00:00 0 + /// Size: 1048576 kB + /// KernelPageSize: 4 kB + /// MMUPageSize: 4 kB + /// Rss: 539516 kB + /// Pss: 539516 kB + /// ... + + auto read_token = [&] + { + String res; + while (!in.eof()) + { + char c = *in.position(); + if (c == '\n' || c == '\t' || c == ' ' || c == '-') + break; + res += c; + ++in.position(); + } + return res; + }; + + auto skip_whitespace = [&] + { + while (!in.eof()) + { + char c = *in.position(); + if (c != ' ' && c != '\t') + break; + ++in.position(); + } + }; + + bool current_range_is_cache = false; + size_t total_rss = 0; + size_t total_lazy_free = 0; + while (!in.eof()) + { + String s = read_token(); + if (!in.eof() && *in.position() == '-') + { + if (s.size() < 16) + s.insert(0, 16 - s.size(), '0'); + UInt64 addr = unhexUInt(s.c_str()); + current_range_is_cache = cache_mmap_addrs.contains(addr); + } + else if (s == "Rss:" || s == "LazyFree") + { + skip_whitespace(); + size_t val; + readIntText(val, in); + skip_whitespace(); + String unit = read_token(); + if (unit != "kB") + throw Exception(ErrorCodes::SYSTEM_ERROR, "Unexpected units in /proc/self/smaps: {}", unit); + size_t bytes = val * 1024; + + if (s == "Rss:") + { + total_rss += bytes; + if (current_range_is_cache) + stats.page_cache_rss += bytes; + } + else + total_lazy_free += bytes; + } + skipToNextLineOrEOF(in); + } + stats.unreclaimable_rss = total_rss - std::min(total_lazy_free, total_rss); + + return stats; + } +#endif + + stats.page_cache_rss = bytes_per_page * pages_per_chunk * total_chunks; + return stats; +} + +PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing, bool inject_eviction) +{ + PageChunk * chunk; + /// Make sure we increment exactly one of the counters about the fate of a chunk lookup. + bool incremented_profile_events = false; + + { + std::unique_lock lock(global_mutex); + + auto * it = chunk_by_key.find(key); + if (it == chunk_by_key.end()) + { + chunk = getFreeChunk(lock); + chassert(!chunk->key.has_value()); + + if (!detached_if_missing) + { + chunk->key = key; + chunk_by_key.insert({key, chunk}); + } + + ProfileEvents::increment(ProfileEvents::PageCacheChunkMisses); + incremented_profile_events = true; + } + else + { + chunk = it->getMapped(); + size_t prev_pin_count = chunk->pin_count.fetch_add(1); + + if (prev_pin_count == 0) + { + /// Not eligible for LRU eviction while pinned. + chassert(chunk->is_linked()); + lru.erase(lru.iterator_to(*chunk)); + + if (detached_if_missing) + { + /// Peek the first page to see if it's evicted. + /// (Why not use the full probing procedure instead, restoreChunkFromLimbo()? + /// Right here we can't do it because of how the two mutexes are organized. + /// And we want to do the check+detach before unlocking global_mutex, because + /// otherwise we may detach a chunk pinned by someone else, which may be unexpected + /// for that someone else. Or maybe the latter is fine, dropCache() already does it.) + if (chunk->pages_populated.get(0) && reinterpret_cast*>(chunk->data)->load(std::memory_order_relaxed) == 0) + evictChunk(chunk, lock); + } + + if (inject_eviction && chunk->key.has_value() && rng() % 10 == 0) + { + /// Simulate eviction of the chunk or some of its pages. + if (rng() % 2 == 0) + evictChunk(chunk, lock); + else + for (size_t i = 0; i < 20; ++i) + chunk->pages_populated.unset(rng() % (chunk->size / chunk->page_size)); + } + } + else + { + ProfileEvents::increment(ProfileEvents::PageCacheChunkShared); + incremented_profile_events = true; + } + } + } + + { + std::unique_lock chunk_lock(chunk->chunk_mutex); + + if (chunk->pages_state == PageChunkState::Limbo) + { + auto [pages_restored, pages_evicted] = restoreChunkFromLimbo(chunk, chunk_lock); + chunk->pages_state = PageChunkState::Stable; + + if (!incremented_profile_events) + { + if (pages_evicted == 0) + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataHits); + else if (pages_evicted < pages_restored) + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataPartialHits); + else + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataMisses); + } + } + } + + return PinnedPageChunk(this, chunk); +} + +void PageCache::removeRef(PageChunk * chunk) noexcept +{ + /// Fast path if this is not the last reference. + size_t prev_pin_count = chunk->pin_count.load(); + if (prev_pin_count > 1 && chunk->pin_count.compare_exchange_strong(prev_pin_count, prev_pin_count - 1)) + return; + + { + std::unique_lock lock(global_mutex); + + prev_pin_count = chunk->pin_count.fetch_sub(1); + if (prev_pin_count > 1) + return; + + chassert(!chunk->is_linked()); + if (chunk->key.has_value()) + lru.push_back(*chunk); + else + /// Unpinning detached chunk. We'd rather reuse it soon, so put it at the front. + lru.push_front(*chunk); + } + + { + std::unique_lock chunk_lock(chunk->chunk_mutex); + + /// Need to be extra careful here because we unlocked global_mutex above, so other + /// getOrSet()/removeRef() calls could have happened during this brief period. + if (use_madv_free && chunk->pages_state == PageChunkState::Stable && chunk->pin_count.load() == 0) + { + sendChunkToLimbo(chunk, chunk_lock); + chunk->pages_state = PageChunkState::Limbo; + } + } +} + +static void logUnexpectedSyscallError(std::string name) +{ + std::string message = fmt::format("{} failed: {}", name, errnoToString()); + LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); +#if defined(ABORT_ON_LOGICAL_ERROR) + volatile bool true_ = true; + if (true_) // suppress warning about missing [[noreturn]] + abortOnFailedAssertion(message); +#endif +} + +void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique_lock & /* chunk_mutex */) const noexcept +{ +#ifdef MADV_FREE // if we're not on a very old version of Linux + chassert(chunk->size == bytes_per_page * pages_per_chunk); + size_t populated_pages = 0; + size_t populated_big_pages = 0; + for (size_t big_page_idx = 0; big_page_idx < pages_per_chunk / pages_per_big_page; ++big_page_idx) + { + bool big_page_populated = false; + for (size_t sub_idx = 0; sub_idx < pages_per_big_page; ++sub_idx) + { + size_t idx = big_page_idx * pages_per_big_page + sub_idx; + if (!chunk->pages_populated.get(idx)) + continue; + big_page_populated = true; + populated_pages += 1; + + auto & byte = reinterpret_cast &>(chunk->data[idx * bytes_per_page]); + chunk->first_bit_of_each_page.set(idx, (byte.load(std::memory_order_relaxed) & 1) != 0); + byte.fetch_or(1, std::memory_order_relaxed); + } + if (big_page_populated) + populated_big_pages += 1; + } + int r = madvise(chunk->data, chunk->size, MADV_FREE); + if (r != 0) + logUnexpectedSyscallError("madvise(MADV_FREE)"); + + ProfileEvents::increment(ProfileEvents::PageCacheBytesUnpinnedRoundedToPages, bytes_per_page * populated_pages); + ProfileEvents::increment(ProfileEvents::PageCacheBytesUnpinnedRoundedToHugePages, bytes_per_page * pages_per_big_page * populated_big_pages); +#endif +} + +std::pair PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept +{ + static_assert(sizeof(std::atomic) == 1, "char is not atomic?"); + // Make sure our strategic memory reads/writes are not reordered or optimized out. + auto * data = reinterpret_cast *>(chunk->data); + size_t pages_restored = 0; + size_t pages_evicted = 0; + for (size_t idx = 0; idx < chunk->size / bytes_per_page; ++idx) + { + if (!chunk->pages_populated.get(idx)) + continue; + + /// After MADV_FREE, it's guaranteed that: + /// * writing to the page makes it non-freeable again (reading doesn't), + /// * after the write, the page contents are either fully intact or fully zero-filled, + /// * even before the write, reads return either intact data (if the page wasn't freed) or zeroes (if it was, and the read page-faulted). + /// (And when doing the write there's no way to tell whether it page-faulted or not, AFAICT; that would make our life much easier!) + /// + /// With that in mind, we do the following dance to bring the page back from the MADV_FREE limbo: + /// 0. [in advance] Before doing MADV_FREE, make sure the page's first byte is not zero. + /// We do it by setting the lowest bit of the first byte to 1, after saving the original value of that bit into a bitset. + /// 1. Read the second byte. + /// 2. Write the second byte back. This makes the page non-freeable. + /// 3. Read the first byte. + /// 3a. If it's zero, the page was freed. + /// Set the second byte to 0, to keep the buffer zero-filled if the page was freed + /// between steps 1 and 2. + /// 3b. If it's nonzero, the page is intact. + /// Restore the lowest bit of the first byte to the saved original value from the bitset. + + char second_byte = data[idx * bytes_per_page + 1].load(std::memory_order_relaxed); + data[idx * bytes_per_page + 1].store(second_byte, std::memory_order_relaxed); + + char first_byte = data[idx * bytes_per_page].load(std::memory_order_relaxed); + if (first_byte == 0) + { + pages_evicted += 1; + data[idx * bytes_per_page + 1].store(0, std::memory_order_relaxed); + chunk->pages_populated.unset(idx); + } + else + { + pages_restored += 1; + chassert(first_byte & 1); + if (!chunk->first_bit_of_each_page.get(idx)) + data[idx * bytes_per_page].fetch_and(~1, std::memory_order_relaxed); + } + } + return {pages_restored, pages_evicted}; +} + +PageChunk * PageCache::getFreeChunk(std::unique_lock & lock /* global_mutex */) +{ + if (lru.empty() || (mmaps.size() < max_mmaps && lru.front().key.has_value())) + addMmap(lock); + if (lru.empty()) + throw Exception(ErrorCodes::MEMORY_LIMIT_EXCEEDED, "All chunks in the entire page cache ({:.3} GiB) are pinned.", + bytes_per_page * pages_per_chunk * total_chunks * 1. / (1l << 30)); + + PageChunk * chunk = &lru.front(); + lru.erase(lru.iterator_to(*chunk)); + + size_t prev_pin_count = chunk->pin_count.fetch_add(1); + chassert(prev_pin_count == 0); + + evictChunk(chunk, lock); + + return chunk; +} + +void PageCache::evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */) +{ + if (chunk->key.has_value()) + { + size_t erased = chunk_by_key.erase(chunk->key.value()); + chassert(erased); + chunk->key.reset(); + } + + chunk->state.reset(); + + /// This is tricky. We're not holding the chunk_mutex, so another thread might be running + /// sendChunkToLimbo() or even restoreChunkFromLimbo() on this chunk right now. + /// + /// Nevertheless, it's correct and sufficient to clear pages_populated here because sendChunkToLimbo() + /// and restoreChunkFromLimbo() only touch pages_populated (only unsetting the bits), + /// first_bit_of_each_page, and the data; and we don't care about first_bit_of_each_page and the data. + /// + /// This is precarious, but I don't have better ideas. Note that this clearing (or something else) + /// must be done before unlocking the global_mutex because otherwise another call to getOrSet() might + /// return this chunk before we clear it. + chunk->pages_populated.unsetAll(); +} + +void PageCache::addMmap(std::unique_lock & /* global_mutex */) +{ + /// ASLR by hand. + void * address_hint = reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(rng)); + + mmaps.emplace_back(bytes_per_page, pages_per_chunk, pages_per_big_page, chunks_per_mmap_target, address_hint, use_huge_pages); + + size_t num_chunks = mmaps.back().num_chunks; + total_chunks += num_chunks; + for (size_t i = 0; i < num_chunks; ++i) + /// Link in reverse order, so they get assigned in increasing order. Not important, just seems nice. + lru.push_front(mmaps.back().chunks[num_chunks - 1 - i]); +} + +void PageCache::dropCache() +{ + std::unique_lock lock(global_mutex); + + /// Detach and free unpinned chunks. + bool logged_error = false; + for (PageChunk & chunk : lru) + { + evictChunk(&chunk, lock); + + if (use_madv_free) + { + /// This might happen in parallel with sendChunkToLimbo() or restoreChunkFromLimbo(), but it's ok. + int r = madvise(chunk.data, chunk.size, MADV_DONTNEED); + if (r != 0 && !logged_error) + { + logUnexpectedSyscallError("madvise(MADV_DONTNEED)"); + logged_error = true; + } + } + } + + /// Detach pinned chunks. + for (auto [key, chunk] : chunk_by_key) + { + chassert(chunk->key == key); + chassert(chunk->pin_count > 0); // otherwise it would have been evicted above + chunk->key.reset(); + } + chunk_by_key.clear(); +} + +PageCache::Mmap::Mmap(size_t bytes_per_page_, size_t pages_per_chunk_, size_t pages_per_big_page_, size_t num_chunks_, void * address_hint, bool use_huge_pages_) +{ + num_chunks = num_chunks_; + size = bytes_per_page_ * pages_per_chunk_ * num_chunks; + + size_t alignment = bytes_per_page_ * pages_per_big_page_; + address_hint = reinterpret_cast(reinterpret_cast(address_hint) / alignment * alignment); + + auto temp_chunks = std::make_unique(num_chunks); + + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#ifdef OS_LINUX + flags |= MAP_NORESERVE; +#endif + ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (MAP_FAILED == ptr) + throw ErrnoException(ErrorCodes::CANNOT_ALLOCATE_MEMORY, fmt::format("Cannot mmap {}.", ReadableSize(size))); + if (reinterpret_cast(ptr) % bytes_per_page_ != 0) + { + munmap(ptr, size); + throw Exception(ErrorCodes::SYSTEM_ERROR, "mmap returned unaligned address: {}", ptr); + } + + void * chunks_start = ptr; + +#ifdef OS_LINUX + if (madvise(ptr, size, MADV_DONTDUMP) != 0) + logUnexpectedSyscallError("madvise(MADV_DONTDUMP)"); + if (madvise(ptr, size, MADV_DONTFORK) != 0) + logUnexpectedSyscallError("madvise(MADV_DONTFORK)"); + + if (use_huge_pages_) + { + if (reinterpret_cast(ptr) % alignment != 0) + { + LOG_DEBUG(&Poco::Logger::get("PageCache"), "mmap() returned address not aligned on huge page boundary."); + chunks_start = reinterpret_cast((reinterpret_cast(ptr) / alignment + 1) * alignment); + chassert(reinterpret_cast(chunks_start) % alignment == 0); + num_chunks -= 1; + } + + if (madvise(ptr, size, MADV_HUGEPAGE) != 0) + LOG_WARNING(&Poco::Logger::get("PageCache"), + "madvise(MADV_HUGEPAGE) failed: {}. Userspace page cache will be relatively slow.", errnoToString()); + } +#else + (void)use_huge_pages_; +#endif + + chunks = std::move(temp_chunks); + for (size_t i = 0; i < num_chunks; ++i) + { + PageChunk * chunk = &chunks[i]; + chunk->data = reinterpret_cast(chunks_start) + bytes_per_page_ * pages_per_chunk_ * i; + chunk->size = bytes_per_page_ * pages_per_chunk_; + chunk->page_size = bytes_per_page_; + chunk->big_page_size = bytes_per_page_ * pages_per_big_page_; + chunk->pages_populated.init(pages_per_chunk_); + chunk->first_bit_of_each_page.init(pages_per_chunk_); + } +} + +PageCache::Mmap::Mmap(Mmap && m) noexcept : ptr(std::exchange(m.ptr, nullptr)), size(std::exchange(m.size, 0)), chunks(std::move(m.chunks)), num_chunks(std::exchange(m.num_chunks, 0)) {} + +PageCache::Mmap::~Mmap() noexcept +{ + if (ptr && 0 != munmap(ptr, size)) + logUnexpectedSyscallError("munmap"); +} + +void FileChunkState::reset() {} + +PageCacheKey FileChunkAddress::hash() const +{ + SipHash hash(offset); + hash.update(path.data(), path.size()); + if (!file_version.empty()) + { + hash.update("\0", 1); + hash.update(file_version.data(), file_version.size()); + } + return hash.get128(); +} + +std::string FileChunkAddress::toString() const +{ + return fmt::format("{}:{}{}{}", path, offset, file_version.empty() ? "" : ":", file_version); +} + +#pragma clang diagnostic pop + +} diff --git a/src/Common/PageCache.h b/src/Common/PageCache.h new file mode 100644 index 00000000000..7ff376baa6b --- /dev/null +++ b/src/Common/PageCache.h @@ -0,0 +1,299 @@ +#pragma once + +#include +#include +#include +#include +#include + +/// "Userspace page cache" +/// A cache for contents of remote files. +/// Uses MADV_FREE to allow Linux to evict pages from our cache under memory pressure. +/// Typically takes up almost all of the available memory, similar to the actual page cache. +/// +/// Intended for caching data retrieved from distributed cache, but can be used for other things too, +/// just replace FileChunkState with a discriminated union, or something, if needed. +/// +/// There are two fixed-size units of caching here: +/// * OS pages, typically 4 KiB each. +/// * Page chunks, 2 MiB each (configurable with page_cache_block_size setting). +/// +/// Each file is logically split into aligned 2 MiB blocks, which are mapped to page chunks inside the cache. +/// They are cached independently from each other. +/// +/// Each page chunk has a contiguous 2 MiB buffer that can be pinned and directly used e.g. by ReadBuffers. +/// While pinned (by at least one PinnedPageChunk), the pages are not reclaimable by the OS. +/// +/// Inside each page chunk, any subset of pages may be populated. Unpopulated pages may or not be +/// mapped to any physical RAM. We maintain a bitmask that keeps track of which pages are populated. +/// Pages become unpopulated if they're reclaimed by the OS (when the page chunk is not pinned), +/// or if we just never populate them in the first place (e.g. if a file is shorter than 2 MiB we +/// still create a 2 MiB page chunk, but use only a prefix of it). +/// +/// There are two separate eviction mechanisms at play: +/// * LRU eviction of page chunks in PageCache. +/// * OS reclaiming pages on memory pressure. We have no control over the eviction policy. +/// It probably picks the pages in the same order in which they were marked with MADV_FREE, so +/// effectively in the same LRU order as our policy in PageCache. +/// When using PageCache in oversubscribed fashion, using all available memory and relying on OS eviction, +/// the PageCache's eviction policy mostly doesn't matter. It just needs to be similar enough to the OS's +/// policy that we rarely evict chunks with unevicted pages. +/// +/// We mmap memory directly instead of using allocator because this enables: +/// * knowing how much RAM the cache is using, via /proc/self/smaps, +/// * MADV_HUGEPAGE (use transparent huge pages - this makes MADV_FREE 10x less slow), +/// * MAP_NORESERVE (don't reserve swap space - otherwise large mmaps usually fail), +/// * MADV_DONTDUMP (don't include in core dumps), +/// * page-aligned addresses without padding. +/// +/// madvise(MADV_FREE) call is slow: ~6 GiB/s (doesn't scale with more threads). Enabling transparent +/// huge pages (MADV_HUGEPAGE) makes it 10x less slow, so we do that. That makes the physical RAM allocation +/// work at 2 MiB granularity instead of 4 KiB, so the cache becomes less suitable for small files. +/// If this turns out to be a problem, we may consider allowing different mmaps to have different flags, +/// some having no huge pages. +/// Note that we do our bookkeeping at small-page granularity even if huge pages are enabled. +/// +/// It's unfortunate that Linux's MADV_FREE eviction doesn't use the two-list strategy like the real +/// page cache (IIUC, MADV_FREE puts the pages at the head of the inactive list, and they can never +/// get to the active list). +/// If this turns out to be a problem, we could make PageCache do chunk eviction based on observed +/// system memory usage, so that most eviction is done by us, and the MADV_FREE eviction kicks in +/// only as a last resort. Then we can make PageCache's eviction policy arbitrarily more sophisticated. + +namespace DB +{ + +/// Hash of FileChunkAddress. +using PageCacheKey = UInt128; + +/// Identifies a chunk of a file or object. +/// We assume that contents of such file/object don't change (without file_version changing), so +/// cache invalidation is needed. +struct FileChunkAddress +{ + /// Path, usually prefixed with storage system name and anything else needed to make it unique. + /// E.g. "s3:/" + std::string path; + /// Optional string with ETag, or file modification time, or anything else. + std::string file_version; + size_t offset = 0; + + PageCacheKey hash() const; + + std::string toString() const; +}; + +struct AtomicBitSet +{ + size_t n = 0; + std::unique_ptr[]> v; + + AtomicBitSet(); + + void init(size_t n); + + bool get(size_t i) const; + bool any() const; + /// These return true if the bit was changed, false if it already had the target value. + /// (These methods are logically not const, but clang insists that I make them const, and + /// '#pragma clang diagnostic ignored' doesn't seem to work.) + bool set(size_t i) const; + bool set(size_t i, bool val) const; + bool unset(size_t i) const; + void unsetAll() const; +}; + +enum class PageChunkState +{ + /// Pages are not reclaimable by the OS, the buffer has correct contents. + Stable, + /// Pages are reclaimable by the OS, the buffer contents are altered (first bit of each page set to 1). + Limbo, +}; + +/// (This is a separate struct just in case we want to use this cache for other things in future. +/// Then this struct would be the customization point, while the rest of PageChunk can stay unchanged.) +struct FileChunkState +{ + std::mutex download_mutex; + + void reset(); +}; + +using PageChunkLRUListHook = boost::intrusive::list_base_hook<>; + +/// Cache entry. +struct PageChunk : public PageChunkLRUListHook +{ + char * data; + size_t size; // in bytes + /// Page size for use in pages_populated and first_bit_of_each_page. Same as PageCache::pageSize(). + size_t page_size; + + /// Actual eviction granularity. Just for information. If huge pages are used, huge page size, otherwise page_size. + size_t big_page_size; + + mutable FileChunkState state; + + AtomicBitSet pages_populated; + +private: + friend class PinnedPageChunk; + friend class PageCache; + + /// If nullopt, the chunk is "detached", i.e. not associated with any key. + /// Detached chunks may still be pinned. Chunk may get detached even while pinned, in particular when dropping cache. + /// Protected by global_mutex. + std::optional key; + + /// Refcount for usage of this chunk. When zero, the pages are reclaimable by the OS, and + /// the PageChunk itself is evictable (linked into PageCache::lru). + std::atomic pin_count {0}; + + /// Bit mask containing the first bit of data from each page. Needed for the weird probing procedure when un-MADV_FREE-ing the pages. + AtomicBitSet first_bit_of_each_page; + + /// Locked when changing pages_state, along with the corresponding expensive MADV_FREE/un-MADV_FREE operation. + mutable std::mutex chunk_mutex; + + /// Normally pin_count == 0 <=> state == PageChunkState::Limbo, + /// pin_count > 0 <=> state == PageChunkState::Stable. + /// This separate field is needed because of synchronization: pin_count is changed with global_mutex locked, + /// this field is changed with chunk_mutex locked, and we never have to lock both mutexes at once. + PageChunkState pages_state = PageChunkState::Stable; +}; + +class PageCache; + +/// Handle for a cache entry. Neither the entry nor its pages can get evicted while there's at least one PinnedPageChunk pointing to it. +class PinnedPageChunk +{ +public: + const PageChunk * getChunk() const; + + /// Sets the bit in pages_populated. Returns true if it actually changed (i.e. was previously 0). + bool markPagePopulated(size_t page_idx); + + /// Calls markPagePopulated() for pages 0..ceil(bytes/page_size). + void markPrefixPopulated(size_t bytes); + + bool isPrefixPopulated(size_t bytes) const; + + PinnedPageChunk() = default; + ~PinnedPageChunk() noexcept; + + PinnedPageChunk(PinnedPageChunk &&) noexcept; + PinnedPageChunk & operator=(PinnedPageChunk &&) noexcept; + +private: + friend class PageCache; + + PageCache * cache = nullptr; + PageChunk * chunk = nullptr; + + PinnedPageChunk(PageCache * cache_, PageChunk * chunk_) noexcept; +}; + +class PageCache +{ +public: + PageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages); + ~PageCache(); + + /// Get or insert a chunk for the given key. + /// + /// If detached_if_missing = true, and the key is not present in the cache, the returned chunk + /// won't be associated with the key and will be evicted as soon as it's unpinned. + /// It's like "get if exists, otherwise return null", but instead of null we return a usable + /// temporary buffer, for convenience. Pinning and page eviction make the story more complicated: + /// * If the chunk for this key is pinned, we return it even if it's not fully populated + /// (because PageCache doesn't know what "fully populated" means). + /// * If the chunk exists, but some of its pages were evicted, we detach it. (Currently we only + /// check the first page here.) + PinnedPageChunk getOrSet(PageCacheKey key, bool detached_if_missing, bool inject_eviction); + + /// OS page size, e.g. 4 KiB on x86, 4 KiB or 64 KiB on aarch64. + /// + /// If transparent huge pages are enabled, this is still the regular page size, and all our bookkeeping + /// is still based on regular page size (e.g. pages_populated), because (a) it's cheap anyway, + /// and (b) I'm not sure if Linux guarantees that MADV_FREE reclamation always happens at huge page + /// granularity, and wouldn't want to rely on this even if it does. + size_t pageSize() const; + size_t chunkSize() const; + size_t maxChunks() const; + + struct MemoryStats + { + /// How many bytes of actual RAM are used for the cache pages. Doesn't include metadata + /// and overhead (e.g. PageChunk structs). + size_t page_cache_rss = 0; + /// Resident set size for the whole process, excluding any MADV_FREE pages (PageCache's or not). + /// This can be used as a more useful memory usage number for clickhouse server, instead of RSS. + /// Populated only if MADV_FREE is used, otherwise zero. + std::optional unreclaimable_rss; + }; + + /// Reads /proc/self/smaps, so not very fast. + MemoryStats getResidentSetSize() const; + + /// Total length of memory ranges currently pinned by PinnedPageChunk-s, including unpopulated pages. + size_t getPinnedSize() const; + + /// Clears the key -> chunk mapping. Frees memory (MADV_DONTNEED) of all chunks that are not pinned. + /// Doesn't unmap any virtual memory. Detaches but doesn't free the pinned chunks. + /// Locks the global mutex for the duration of the operation, which may block queries for hundreds of milliseconds. + void dropCache(); + +private: + friend class PinnedPageChunk; + + struct Mmap + { + void * ptr = nullptr; + size_t size = 0; + + std::unique_ptr chunks; + size_t num_chunks = 0; // might be smaller than chunks_per_mmap_target because of alignment + + Mmap(Mmap &&) noexcept; + Mmap(size_t bytes_per_page, size_t pages_per_chunk, size_t pages_per_big_page, size_t num_chunks, void * address_hint, bool use_huge_pages_); + ~Mmap() noexcept; + }; + + size_t bytes_per_page; + size_t pages_per_chunk; + size_t chunks_per_mmap_target; + size_t max_mmaps; + size_t pages_per_big_page = 1; // if huge pages are used, huge_page_size/page_size, otherwise 1 + bool use_madv_free = true; + bool use_huge_pages = true; + + mutable std::mutex global_mutex; + + pcg64 rng; + + std::vector mmaps; + size_t total_chunks = 0; + + /// All non-pinned chunks, including ones not assigned to any file. Least recently used is begin(). + boost::intrusive::list, boost::intrusive::constant_time_size> lru; + + HashMap chunk_by_key; + + /// Get a usable chunk, doing eviction or allocation if needed. + /// Caller is responsible for clearing pages_populated. + PageChunk * getFreeChunk(std::unique_lock & /* global_mutex */); + void addMmap(std::unique_lock & /* global_mutex */); + void evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */); + + void removeRef(PageChunk * chunk) noexcept; + + /// These may run in parallel with getFreeChunk(), so be very careful about which fields of the PageChunk we touch here. + void sendChunkToLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; + /// Returns {pages_restored, pages_evicted}. + std::pair restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; +}; + +using PageCachePtr = std::shared_ptr; + +} diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d8ca1ab9e93..3a8659b8b27 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -63,6 +63,15 @@ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \ M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ + /* Each page cache chunk access increments exactly one of the following 5 PageCacheChunk* counters. */ \ + /* Something like hit rate: (PageCacheChunkShared + PageCacheChunkDataHits) / [sum of all 5]. */ \ + M(PageCacheChunkMisses, "Number of times a chunk has not been found in the userspace page cache.") \ + M(PageCacheChunkShared, "Number of times a chunk has been found in the userspace page cache, already in use by another thread.") \ + M(PageCacheChunkDataHits, "Number of times a chunk has been found in the userspace page cache, not in use, with all pages intact.") \ + M(PageCacheChunkDataPartialHits, "Number of times a chunk has been found in the userspace page cache, not in use, but some of its pages were evicted by the OS.") \ + M(PageCacheChunkDataMisses, "Number of times a chunk has been found in the userspace page cache, not in use, but all its pages were evicted by the OS.") \ + M(PageCacheBytesUnpinnedRoundedToPages, "Total size of populated pages in chunks that became evictable in PageCache. Rounded up to whole pages.") \ + M(PageCacheBytesUnpinnedRoundedToHugePages, "See PageCacheBytesUnpinnedRoundedToPages, but rounded to huge pages. Use the ratio between the two as a measure of memory waste from using huge pages.") \ M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \ diff --git a/src/Core/Defines.h b/src/Core/Defines.h index bf9fb1db6bc..cc6f49aa361 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -70,6 +70,15 @@ static constexpr auto DBMS_DEFAULT_MAX_QUERY_SIZE = 262144; /// Max depth of hierarchical dictionary static constexpr auto DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH = 1000; +#ifdef OS_LINUX +#define DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE true +#else +/// On Mac OS, MADV_FREE is not lazy, so page_cache_use_madv_free should be disabled. +/// On FreeBSD, it may work but we haven't tested it. +#define DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE false +#endif + + /// Default maximum (total and entry) sizes and policies of various caches static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 3713d0c3206..a54fb42b464 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -65,7 +65,7 @@ namespace DB M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ \ - M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ + M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \ @@ -78,6 +78,11 @@ namespace DB M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \ M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \ M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ + M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ + M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ + M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ + M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..7d1112af3a7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -777,6 +777,10 @@ class IColumn; M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \ \ + M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \ + M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \ + M(Bool, page_cache_inject_eviction, false, "Userspace page cache will sometimes invalidate some pages at random. Intended for testing.", 0) \ + \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \ M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e8d013d13ec..02ee641903c 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -114,6 +114,9 @@ static std::map sett {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, }}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 2373640704b..1a9cd2c994c 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -129,6 +129,7 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position) /// new read until position is after the current position in the working buffer file_offset_of_buffer_end = position; working_buffer.resize(working_buffer.size() - (file_offset_of_buffer_end - position)); + pos = std::min(pos, working_buffer.end()); } else { @@ -235,9 +236,6 @@ bool AsynchronousBoundedReadBuffer::nextImpl() file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); - /// In case of multiple files for the same file in clickhouse (i.e. log family) - /// file_offset_of_buffer_end will not match getImplementationBufferOffset() - /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] chassert(file_offset_of_buffer_end <= impl->getFileSize()); if (read_until_position && (file_offset_of_buffer_end > *read_until_position)) @@ -264,7 +262,7 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) size_t new_pos; if (whence == SEEK_SET) { - assert(offset >= 0); + chassert(offset >= 0); new_pos = offset; } else if (whence == SEEK_CUR) @@ -290,8 +288,8 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) /// Position is still inside the buffer. /// Probably it is at the end of the buffer - then we will load data on the following 'next' call. pos = working_buffer.end() - file_offset_of_buffer_end + new_pos; - assert(pos >= working_buffer.begin()); - assert(pos <= working_buffer.end()); + chassert(pos >= working_buffer.begin()); + chassert(pos <= working_buffer.end()); return new_pos; } @@ -317,7 +315,7 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) break; } - assert(!prefetch_future.valid()); + chassert(!prefetch_future.valid()); /// First reset the buffer so the next read will fetch new data to the buffer. resetWorkingBuffer(); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 7ce3d58dcd8..47ee5858562 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1215,7 +1215,7 @@ size_t CachedOnDiskReadBufferFromFile::getRemainingSizeToRead() void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position) { - if (!allow_seeks_after_first_read) + if (initialized && !allow_seeks_after_first_read) throw Exception(ErrorCodes::LOGICAL_ERROR, "Method `setReadUntilPosition()` not allowed"); if (read_until_position == position) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 0b3ecca3587..417f7615dd7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -16,12 +17,16 @@ using namespace DB; namespace { -bool withCache(const ReadSettings & settings) +bool withFileCache(const ReadSettings & settings) { return settings.remote_fs_cache && settings.enable_filesystem_cache && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache || !settings.avoid_readthrough_cache_outside_query_context); } +bool withPageCache(const ReadSettings & settings, bool with_file_cache) +{ + return settings.page_cache && !with_file_cache && settings.use_page_cache_for_disks_without_file_cache; +} } namespace DB @@ -34,7 +39,7 @@ namespace ErrorCodes size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) { /// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task. - if (!withCache(settings)) + if (!withFileCache(settings)) return settings.remote_fs_buffer_size; /// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file. @@ -44,27 +49,30 @@ size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, + const std::string & cache_path_prefix_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_) - : ReadBufferFromFileBase( - use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading(settings_, getTotalSize(blobs_to_read_)), nullptr, 0) + : ReadBufferFromFileBase(use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading( + settings_, getTotalSize(blobs_to_read_)), nullptr, 0) , settings(settings_) , blobs_to_read(blobs_to_read_) , read_buffer_creator(std::move(read_buffer_creator_)) + , cache_path_prefix(cache_path_prefix_) , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) , query_id(CurrentThread::getQueryId()) , use_external_buffer(use_external_buffer_) - , with_cache(withCache(settings)) + , with_file_cache(withFileCache(settings)) + , with_page_cache(withPageCache(settings, with_file_cache)) , log(getLogger("ReadBufferFromRemoteFSGather")) { if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); } -SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) +SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object, size_t start_offset) { - if (current_buf && !with_cache) + if (current_buf && !with_file_cache) { appendUncachedReadInfo(); } @@ -72,30 +80,45 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c current_object = object; const auto & object_path = object.remote_path; - size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size; - auto current_read_buffer_creator = [=, this]() { return read_buffer_creator(object_path, current_read_until_position); }; + std::unique_ptr buf; #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - if (with_cache) + if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); - return std::make_shared( + buf = std::make_unique( object_path, cache_key, settings.remote_fs_cache, FileCache::getCommonUser(), - std::move(current_read_buffer_creator), + [=, this]() { return read_buffer_creator(/* restricted_seek */true, object_path); }, settings, query_id, object.bytes_size, /* allow_seeks */false, /* use_external_buffer */true, - read_until_position ? std::optional(read_until_position) : std::nullopt, + /* read_until_position */std::nullopt, cache_log); } #endif - return current_read_buffer_creator(); + /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the + /// former doesn't support seeks. + if (with_page_cache && !buf) + { + auto inner = read_buffer_creator(/* restricted_seek */false, object_path); + auto cache_key = FileChunkAddress { .path = cache_path_prefix + object_path }; + buf = std::make_unique( + cache_key, settings.page_cache, std::move(inner), settings); + } + + if (!buf) + buf = read_buffer_creator(/* restricted_seek */true, object_path); + + if (read_until_position > start_offset && read_until_position < start_offset + object.bytes_size) + buf->setReadUntilPosition(read_until_position - start_offset); + + return buf; } void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() @@ -124,12 +147,12 @@ void ReadBufferFromRemoteFSGather::initialize() return; /// One clickhouse file can be split into multiple files in remote fs. - auto current_buf_offset = file_offset_of_buffer_end; + size_t start_offset = 0; for (size_t i = 0; i < blobs_to_read.size(); ++i) { const auto & object = blobs_to_read[i]; - if (object.bytes_size > current_buf_offset) + if (start_offset + object.bytes_size > file_offset_of_buffer_end) { LOG_TEST(log, "Reading from file: {} ({})", object.remote_path, object.local_path); @@ -137,14 +160,14 @@ void ReadBufferFromRemoteFSGather::initialize() if (!current_buf || current_buf_idx != i) { current_buf_idx = i; - current_buf = createImplementationBuffer(object); + current_buf = createImplementationBuffer(object, start_offset); } - current_buf->seek(current_buf_offset, SEEK_SET); + current_buf->seek(file_offset_of_buffer_end - start_offset, SEEK_SET); return; } - current_buf_offset -= object.bytes_size; + start_offset += object.bytes_size; } current_buf_idx = blobs_to_read.size(); current_buf = nullptr; @@ -171,14 +194,14 @@ bool ReadBufferFromRemoteFSGather::nextImpl() bool ReadBufferFromRemoteFSGather::moveToNextBuffer() { /// If there is no available buffers - nothing to read. - if (current_buf_idx + 1 >= blobs_to_read.size()) + if (current_buf_idx + 1 >= blobs_to_read.size() || (read_until_position && file_offset_of_buffer_end >= read_until_position)) return false; ++current_buf_idx; const auto & object = blobs_to_read[current_buf_idx]; LOG_TEST(log, "Reading from next file: {} ({})", object.remote_path, object.local_path); - current_buf = createImplementationBuffer(object); + current_buf = createImplementationBuffer(object, file_offset_of_buffer_end); return true; } @@ -263,7 +286,7 @@ off_t ReadBufferFromRemoteFSGather::seek(off_t offset, int whence) ReadBufferFromRemoteFSGather::~ReadBufferFromRemoteFSGather() { - if (!with_cache) + if (!with_file_cache) appendUncachedReadInfo(); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index f6b7506a54f..8362b354e23 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -21,11 +21,12 @@ class ReadBufferFromRemoteFSGather final : public ReadBufferFromFileBase friend class ReadIndirectBufferFromRemoteFS; public: - using ReadBufferCreator = std::function(const std::string & path, size_t read_until_position)>; + using ReadBufferCreator = std::function(bool restricted_seek, const std::string & path)>; ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, + const std::string & cache_path_prefix_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_); @@ -53,7 +54,7 @@ public: bool isContentCached(size_t offset, size_t size) override; private: - SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object); + SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object, size_t start_offset); bool nextImpl() override; @@ -70,10 +71,12 @@ private: const ReadSettings settings; const StoredObjects blobs_to_read; const ReadBufferCreator read_buffer_creator; + const std::string cache_path_prefix; const std::shared_ptr cache_log; const String query_id; const bool use_external_buffer; - const bool with_cache; + const bool with_file_cache; + const bool with_page_cache; size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index f3caf62ffd5..590fc4c4656 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -152,6 +152,8 @@ IAsynchronousReader::Result ThreadPoolRemoteFSReader::execute(Request request, b IAsynchronousReader::Result read_result; if (result) { + chassert(reader.buffer().begin() == request.buf); + chassert(reader.buffer().end() <= request.buf + request.size); read_result.size = reader.buffer().size(); read_result.offset = reader.offset(); ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, read_result.size); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index abc251b2b10..eacce5a54ac 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -29,6 +29,9 @@ private: class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor { public: + /// `reader_` implementation must ensure that next() places data at the start of internal_buffer, + /// even if there was previously a seek. I.e. seek() shouldn't leave pending data (no short seek + /// optimization), and nextImpl() shouldn't assign nextimpl_working_buffer_offset. explicit RemoteFSFileDescriptor( SeekableReadBuffer & reader_, std::shared_ptr async_read_counters_) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 74389aedb64..136f69ab729 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -206,7 +206,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL auto read_buffer_creator = [this, settings_ptr, disk_read_settings] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client.get(), @@ -215,8 +215,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL settings_ptr->max_single_read_retries, settings_ptr->max_single_download_retries, /* use_external_buffer */true, - /* restricted_seek */true, - read_until_position); + restricted_seek); }; switch (read_settings.remote_fs_method) @@ -226,16 +225,17 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL return std::make_unique( std::move(read_buffer_creator), objects, + "azure:", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); - } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( std::move(read_buffer_creator), objects, + "azure:", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 2a648f28f14..16183ec20c1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -527,10 +527,9 @@ std::unique_ptr DiskObjectStorage::readFile( std::optional read_hint, std::optional file_size) const { - auto storage_objects = metadata_storage->getStorageObjects(path); + const auto storage_objects = metadata_storage->getStorageObjects(path); const bool file_can_be_empty = !file_size.has_value() || *file_size == 0; - if (storage_objects.empty() && file_can_be_empty) return std::make_unique(); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index fa5e227d853..f8545ecfe39 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -60,7 +60,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI auto disk_read_settings = patchSettings(read_settings); auto read_buffer_creator = [this, disk_read_settings] - (const std::string & path, size_t /* read_until_position */) -> std::unique_ptr + (bool /* restricted_seek */, const std::string & path) -> std::unique_ptr { size_t begin_of_path = path.find('/', path.find("//") + 2); auto hdfs_path = path.substr(begin_of_path); @@ -71,7 +71,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI }; return std::make_unique( - std::move(read_buffer_creator), objects, disk_read_settings, nullptr, /* use_external_buffer */false); + std::move(read_buffer_creator), objects, "hdfs:", disk_read_settings, nullptr, /* use_external_buffer */false); } std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 02700b358e0..7fd4536f266 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -47,7 +47,7 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL auto modified_settings = patchSettings(read_settings); auto global_context = Context::getGlobalContextInstance(); auto read_buffer_creator = - [=] (const std::string & file_path, size_t /* read_until_position */) + [=] (bool /* restricted_seek */, const std::string & file_path) -> std::unique_ptr { return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); @@ -58,13 +58,13 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL case RemoteFSReadMethod::read: { return std::make_unique( - std::move(read_buffer_creator), objects, modified_settings, + std::move(read_buffer_creator), objects, "file:", modified_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( - std::move(read_buffer_creator), objects, modified_settings, + std::move(read_buffer_creator), objects, "file:", modified_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5771eb1ebe0..d89c7c93e51 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -171,7 +171,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT auto read_buffer_creator = [this, settings_ptr, disk_read_settings] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client.get(), @@ -182,8 +182,8 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT disk_read_settings, /* use_external_buffer */true, /* offset */0, - read_until_position, - /* restricted_seek */true); + /* read_until_position */0, + restricted_seek); }; switch (read_settings.remote_fs_method) @@ -193,16 +193,17 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT return std::make_unique( std::move(read_buffer_creator), objects, + "s3:" + uri.bucket + "/", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); - } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( std::move(read_buffer_creator), objects, + "s3:" + uri.bucket + "/", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 786b23caf48..48de0bf4168 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -252,14 +252,13 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT { auto read_buffer_creator = [this, read_settings] - (const std::string & path_, size_t read_until_position) -> std::unique_ptr + (bool /* restricted_seek */, const std::string & path_) -> std::unique_ptr { return std::make_unique( fs::path(url) / path_, getContext(), read_settings, - /* use_external_buffer */true, - read_until_position); + /* use_external_buffer */true); }; auto global_context = Context::getGlobalContextInstance(); @@ -271,6 +270,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT return std::make_unique( std::move(read_buffer_creator), StoredObjects{object}, + "url:" + url + "/", read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); @@ -280,6 +280,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT auto impl = std::make_unique( std::move(read_buffer_creator), StoredObjects{object}, + "url:" + url + "/", read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 279a399caad..f9590b4419f 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -54,6 +54,9 @@ public: struct Result { + /// The read data is at [buf + offset, buf + size), where `buf` is from Request struct. + /// (Notice that `offset` is included in `size`.) + /// size /// Less than requested amount of data can be returned. /// If size is zero - the file has ended. diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index 4c0a467b155..1a087dd87fa 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -60,6 +60,9 @@ public: BufferBase(Position ptr, size_t size, size_t offset) : pos(ptr + offset), working_buffer(ptr, ptr + size), internal_buffer(ptr, ptr + size) {} + /// Assign the buffers and pos. + /// Be careful when calling this from ReadBuffer::nextImpl() implementations: `offset` is + /// effectively ignored because ReadBuffer::next() reassigns `pos`. void set(Position ptr, size_t size, size_t offset) { internal_buffer = Buffer(ptr, ptr + size); diff --git a/src/IO/CachedInMemoryReadBufferFromFile.cpp b/src/IO/CachedInMemoryReadBufferFromFile.cpp new file mode 100644 index 00000000000..384d2229f14 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.cpp @@ -0,0 +1,188 @@ +#include "CachedInMemoryReadBufferFromFile.h" +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNEXPECTED_END_OF_FILE; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + +CachedInMemoryReadBufferFromFile::CachedInMemoryReadBufferFromFile( + FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_) + : ReadBufferFromFileBase(0, nullptr, 0, in_->getFileSize()), cache_key(cache_key_), cache(cache_), settings(settings_), in(std::move(in_)) + , read_until_position(file_size.value()) +{ + cache_key.offset = 0; +} + +String CachedInMemoryReadBufferFromFile::getFileName() const +{ + return in->getFileName(); +} + +off_t CachedInMemoryReadBufferFromFile::seek(off_t off, int whence) +{ + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); + + size_t offset = static_cast(off); + if (offset > file_size.value()) + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", off); + + if (offset >= file_offset_of_buffer_end - working_buffer.size() && offset <= file_offset_of_buffer_end) + { + pos = working_buffer.end() - (file_offset_of_buffer_end - offset); + chassert(getPosition() == off); + return off; + } + + resetWorkingBuffer(); + + file_offset_of_buffer_end = offset; + chunk.reset(); + + chassert(getPosition() == off); + return off; +} + +off_t CachedInMemoryReadBufferFromFile::getPosition() +{ + return file_offset_of_buffer_end - available(); +} + +size_t CachedInMemoryReadBufferFromFile::getFileOffsetOfBufferEnd() const +{ + return file_offset_of_buffer_end; +} + +void CachedInMemoryReadBufferFromFile::setReadUntilPosition(size_t position) +{ + read_until_position = position; + if (position < static_cast(getPosition())) + { + resetWorkingBuffer(); + chunk.reset(); + } + else if (position < file_offset_of_buffer_end) + { + size_t diff = file_offset_of_buffer_end - position; + working_buffer.resize(working_buffer.size() - diff); + file_offset_of_buffer_end -= diff; + } +} + +void CachedInMemoryReadBufferFromFile::setReadUntilEnd() +{ + setReadUntilPosition(file_size.value()); +} + +bool CachedInMemoryReadBufferFromFile::nextImpl() +{ + chassert(read_until_position <= file_size.value()); + if (file_offset_of_buffer_end >= read_until_position) + return false; + + if (chunk.has_value() && file_offset_of_buffer_end >= cache_key.offset + cache->chunkSize()) + { + chassert(file_offset_of_buffer_end == cache_key.offset + cache->chunkSize()); + chunk.reset(); + } + + if (!chunk.has_value()) + { + cache_key.offset = file_offset_of_buffer_end / cache->chunkSize() * cache->chunkSize(); + chunk = cache->getOrSet(cache_key.hash(), settings.read_from_page_cache_if_exists_otherwise_bypass_cache, settings.page_cache_inject_eviction); + + size_t chunk_size = std::min(cache->chunkSize(), file_size.value() - cache_key.offset); + + std::unique_lock download_lock(chunk->getChunk()->state.download_mutex); + + if (!chunk->isPrefixPopulated(chunk_size)) + { + /// A few things could be improved here, which may or may not be worth the added complexity: + /// * If the next file chunk is in cache, use in->setReadUntilPosition() to limit the read to + /// just one chunk. More generally, look ahead in the cache to count how many next chunks + /// need to be downloaded. (Up to some limit? And avoid changing `in`'s until-position if + /// it's already reasonable; otherwise we'd increase it by one chunk every chunk, discarding + /// a half-completed HTTP request every time.) + /// * If only a subset of pages are missing from this chunk, download only them, + /// with some threshold for avoiding short seeks. + /// In particular, if a previous download failed in the middle of the chunk, we could + /// resume from that position instead of from the beginning of the chunk. + /// (It's also possible in principle that a proper subset of chunk's pages was reclaimed + /// by the OS. But, for performance purposes, we should completely ignore that, because + /// (a) PageCache normally uses 2 MiB transparent huge pages and has just one such page + /// per chunk, and (b) even with 4 KiB pages partial chunk eviction is extremely rare.) + /// * If our [position, read_until_position) covers only part of the chunk, we could download + /// just that part. (Which would be bad if someone else needs the rest of the chunk and has + /// to do a whole new HTTP request to get it. Unclear what the policy should be.) + /// * Instead of doing in->next() in a loop until we get the whole chunk, we could return the + /// results as soon as in->next() produces them. + /// (But this would make the download_mutex situation much more complex, similar to the + /// FileSegment::State::PARTIALLY_DOWNLOADED and FileSegment::setRemoteFileReader() stuff.) + + Buffer prev_in_buffer = in->internalBuffer(); + SCOPE_EXIT({ in->set(prev_in_buffer.begin(), prev_in_buffer.size()); }); + + size_t pos = 0; + while (pos < chunk_size) + { + char * piece_start = chunk->getChunk()->data + pos; + size_t piece_size = chunk_size - pos; + in->set(piece_start, piece_size); + LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, in {:x}, path {}, size {}, offset {:x}, pos {:x}", reinterpret_cast(this), reinterpret_cast(in.get()), cache_key.path, file_size.value(), cache_key.offset, pos); + if (pos == 0) + in->seek(cache_key.offset, SEEK_SET); + else + chassert(!in->available()); + + if (in->eof()) + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "File {} ended after {} bytes, but we expected {}", + getFileName(), cache_key.offset + pos, file_size.value()); + + chassert(in->position() >= piece_start && in->buffer().end() <= piece_start + piece_size); + chassert(in->getPosition() == static_cast(cache_key.offset + pos)); + + size_t n = in->available(); + chassert(n); + if (in->position() != piece_start) + memmove(piece_start, in->position(), n); + in->position() += n; + pos += n; + LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, got {:x} bytes", reinterpret_cast(this), n); + } + + chunk->markPrefixPopulated(chunk_size); + } + } + + nextimpl_working_buffer_offset = file_offset_of_buffer_end - cache_key.offset; + working_buffer = Buffer( + chunk->getChunk()->data, + chunk->getChunk()->data + std::min(chunk->getChunk()->size, read_until_position - cache_key.offset)); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + + if (!internal_buffer.empty()) + { + /// We were given an external buffer to read into. Copy the data into it. + /// Would be nice to avoid this copy, somehow, maybe by making ReadBufferFromRemoteFSGather + /// and AsynchronousBoundedReadBuffer explicitly aware of the page cache. + size_t n = std::min(available(), internal_buffer.size()); + memcpy(internal_buffer.begin(), pos, n); + working_buffer = Buffer(internal_buffer.begin(), internal_buffer.begin() + n); + pos = working_buffer.begin(); + nextimpl_working_buffer_offset = 0; + } + + file_offset_of_buffer_end += available(); + + return true; +} + +} diff --git a/src/IO/CachedInMemoryReadBufferFromFile.h b/src/IO/CachedInMemoryReadBufferFromFile.h new file mode 100644 index 00000000000..300c2e82386 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class CachedInMemoryReadBufferFromFile : public ReadBufferFromFileBase +{ +public: + /// `in_` must support using external buffer. I.e. we assign its internal_buffer before each next() + /// call and expect the read data to be put into that buffer. + /// `in_` should be seekable and should be able to read the whole file from 0 to in_->getFileSize(); + /// if you set `in_`'s read-until-position bypassing CachedInMemoryReadBufferFromFile then + /// CachedInMemoryReadBufferFromFile will break. + CachedInMemoryReadBufferFromFile(FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_); + + String getFileName() const override; + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + size_t getFileOffsetOfBufferEnd() const override; + bool supportsRightBoundedReads() const override { return true; } + void setReadUntilPosition(size_t position) override; + void setReadUntilEnd() override; + +private: + FileChunkAddress cache_key; // .offset is offset of `chunk` start + PageCachePtr cache; + ReadSettings settings; + std::unique_ptr in; + + size_t file_offset_of_buffer_end = 0; + size_t read_until_position; + + std::optional chunk; + + bool nextImpl() override; +}; + +} diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index b45bc8f3dbc..00325734354 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -225,11 +225,22 @@ public: * - seek() to a position above the until position (even if you setReadUntilPosition() to a * higher value right after the seek!), * - * Typical implementations discard any current buffers and connections, even if the position is - * adjusted only a little. + * Implementations are recommended to: + * - Allow the read-until-position to go below current position, e.g.: + * // Read block [300, 400) + * setReadUntilPosition(400); + * seek(300); + * next(); + * // Read block [100, 200) + * setReadUntilPosition(200); // oh oh, this is below the current position, but should be allowed + * seek(100); // but now everything's fine again + * next(); + * // (Swapping the order of seek and setReadUntilPosition doesn't help: then it breaks if the order of blocks is reversed.) + * - Check if new read-until-position value is equal to the current value and do nothing in this case, + * so that the caller doesn't have to. * - * Typical usage is to call it right after creating the ReadBuffer, before it started doing any - * work. + * Typical implementations discard any current buffers and connections when the + * read-until-position changes even by a small (nonzero) amount. */ virtual void setReadUntilPosition(size_t /* position */) {} diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c397689d6ad..f4dc7880be4 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -61,6 +61,7 @@ enum class RemoteFSReadMethod }; class MMappedFileCache; +class PageCache; struct ReadSettings { @@ -102,6 +103,12 @@ struct ReadSettings bool avoid_readthrough_cache_outside_query_context = true; size_t filesystem_cache_segments_batch_size = 20; + //asdqwe assign these two + bool use_page_cache_for_disks_without_file_cache = false; + bool read_from_page_cache_if_exists_otherwise_bypass_cache = false; + bool page_cache_inject_eviction = false; + std::shared_ptr page_cache; + size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); bool skip_download_if_exceeds_query_cache = true; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 8304a876fb1..53fd7d9b45f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -294,6 +295,7 @@ struct ContextSharedPart : boost::noncopyable mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. AsynchronousMetrics * asynchronous_metrics TSA_GUARDED_BY(mutex) = nullptr; /// Points to asynchronous metrics + mutable PageCachePtr page_cache TSA_GUARDED_BY(mutex); /// Userspace page cache. ProcessList process_list; /// Executing queries at the moment. SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; @@ -1228,7 +1230,7 @@ void Context::setUser(const UUID & user_id_, const std::optional() and other AccessControl's functions may require some IO work, - /// so Context::getLock() must be unlocked while we're doing this. + /// so Context::getLocalLock() and Context::getGlobalLock() must be unlocked while we're doing this. auto & access_control = getAccessControl(); auto user = access_control.read(user_id_); @@ -1358,7 +1360,7 @@ void Context::checkAccess(const AccessRightsElements & elements) const { return std::shared_ptr Context::getAccess() const { - /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired. + /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() { /// If setUserID() was never called then this must be the global context with the full access. @@ -1385,7 +1387,8 @@ std::shared_ptr Context::getAccess() const } /// Calculate new access rights according to the collected parameters. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLocalLock() + /// and Context::getGlobalLock() must be unlocked while we're doing this. auto res = getAccessControl().getContextAccess(*params); { @@ -2714,6 +2717,33 @@ void Context::clearUncompressedCache() const shared->uncompressed_cache->clear(); } +void Context::setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages) +{ + std::lock_guard lock(shared->mutex); + + if (shared->page_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Page cache has been already created."); + + shared->page_cache = std::make_shared(bytes_per_chunk, bytes_per_mmap, bytes_total, use_madv_free, use_huge_pages); +} + +PageCachePtr Context::getPageCache() const +{ + SharedLockGuard lock(shared->mutex); + return shared->page_cache; +} + +void Context::dropPageCache() const +{ + PageCachePtr cache; + { + SharedLockGuard lock(shared->mutex); + cache = shared->page_cache; + } + if (cache) + cache->dropCache(); +} + void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { std::lock_guard lock(shared->mutex); @@ -5130,6 +5160,11 @@ ReadSettings Context::getReadSettings() const res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; + res.page_cache = getPageCache(); + res.use_page_cache_for_disks_without_file_cache = settings.use_page_cache_for_disks_without_file_cache; + res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings.read_from_page_cache_if_exists_otherwise_bypass_cache; + res.page_cache_inject_eviction = settings.page_cache_inject_eviction; + res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; /// Zero read buffer will not make progress. diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7bbff9c63bb..ec5a044b28f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -79,6 +79,7 @@ class RefreshSet; class Cluster; class Compiler; class MarkCache; +class PageCache; class MMappedFileCache; class UncompressedCache; class ProcessList; @@ -968,6 +969,10 @@ public: std::shared_ptr getUncompressedCache() const; void clearUncompressedCache() const; + void setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages); + std::shared_ptr getPageCache() const; + void dropPageCache() const; + void setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMarkCache() const; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index a078d99facf..4bb47a8c9e3 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -460,6 +461,13 @@ BlockIO InterpreterSystemQuery::execute() { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); } + case Type::DROP_PAGE_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_PAGE_CACHE); + + getContext()->dropPageCache(); + break; + } case Type::DROP_SCHEMA_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE); @@ -1201,6 +1209,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_INDEX_UNCOMPRESSED_CACHE: case Type::DROP_FILESYSTEM_CACHE: case Type::SYNC_FILESYSTEM_CACHE: + case Type::DROP_PAGE_CACHE: case Type::DROP_SCHEMA_CACHE: case Type::DROP_FORMAT_SCHEMA_CACHE: case Type::DROP_S3_CLIENT_CACHE: diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index bdf314f35b9..fe7ccd64ffe 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -77,6 +79,16 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; } + if (auto page_cache = getContext()->getPageCache()) + { + auto rss = page_cache->getResidentSetSize(); + new_values["PageCacheBytes"] = { rss.page_cache_rss, "Userspace page cache memory usage in bytes" }; + new_values["PageCachePinnedBytes"] = { page_cache->getPinnedSize(), "Userspace page cache memory that's currently in use and can't be evicted" }; + + if (rss.unreclaimable_rss.has_value()) + new_values["UnreclaimableRSS"] = { *rss.unreclaimable_rss, "The amount of physical memory used by the server process, in bytes, excluding memory reclaimable by the OS (MADV_FREE)" }; + } + if (auto uncompressed_cache = getContext()->getUncompressedCache()) { new_values["UncompressedCacheBytes"] = { uncompressed_cache->sizeInBytes(), diff --git a/src/Interpreters/tests/gtest_page_cache.cpp b/src/Interpreters/tests/gtest_page_cache.cpp new file mode 100644 index 00000000000..1e2688c0ca2 --- /dev/null +++ b/src/Interpreters/tests/gtest_page_cache.cpp @@ -0,0 +1,267 @@ +#include +#include +#include + +#ifdef OS_LINUX +#include +#endif + +using namespace DB; + +namespace ProfileEvents +{ + extern const Event PageCacheChunkMisses; + extern const Event PageCacheChunkShared; + extern const Event PageCacheChunkDataHits; + extern const Event PageCacheChunkDataPartialHits; + extern const Event PageCacheChunkDataMisses; +} + +#define CHECK(x) \ + do { \ + if (!(x)) \ + { \ + std::cerr << "check on line " << __LINE__ << " failed: " << #x << std::endl; \ + std::abort(); \ + } \ + } while (false) + +size_t estimateRAMSize() +{ +#ifdef OS_LINUX + struct sysinfo info; + int r = sysinfo(&info); + CHECK(r == 0); + return static_cast(info.totalram * info.mem_unit); +#else + return 128ul << 30; +#endif +} + +/// Do random reads and writes in PageCache from multiple threads, check that the data read matches the data written. +TEST(PageCache, DISABLED_Stress) +{ + /// There doesn't seem to be a reasonable way to simulate memory pressure or force the eviction of MADV_FREE-d pages. + /// So we actually map more virtual memory than we have RAM and fill it all up a few times. + /// This takes an eternity (a few minutes), but idk how else to hit MADV_FREE eviction. + /// Expect ~1 GB/s, bottlenecked by page faults. + size_t ram_size = estimateRAMSize(); + PageCache cache(2 << 20, 1 << 30, ram_size + ram_size / 10, /* use_madv_free */ true, /* use_huge_pages */ true); + + CHECK(cache.getResidentSetSize().page_cache_rss); + + const size_t num_keys = static_cast(cache.maxChunks() * 1.5); + const size_t pages_per_chunk = cache.chunkSize() / cache.pageSize(); + const size_t items_per_page = cache.pageSize() / 8; + + const size_t passes = 2; + const size_t step = 20; + const size_t num_threads = 20; + const size_t chunks_touched = num_keys * passes * num_threads / step; + std::atomic progress {0}; + std::atomic threads_finished {0}; + + std::atomic total_racing_writes {0}; + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + std::vector pinned; + + /// Stats. + size_t racing_writes = 0; + + for (size_t i = 0; i < num_keys * passes; i += step) + { + progress += 1; + + /// Touch the chunks sequentially + noise (to increase interference across threads), or at random 10% of the time. + size_t key_idx; + if (rng() % 10 == 0) + key_idx = std::uniform_int_distribution(0, num_keys - 1)(rng); + else + key_idx = (i + std::uniform_int_distribution(0, num_keys / 1000)(rng)) % num_keys; + + /// For some keys, always use detached_if_missing = true and check that cache always misses. + bool key_detached_if_missing = key_idx % 100 == 42; + bool detached_if_missing = key_detached_if_missing || i % 101 == 42; + + PageCacheKey key = key_idx * 0xcafebabeb0bad00dul; // a simple reversible hash (the constant can be any odd number) + + PinnedPageChunk chunk = cache.getOrSet(key, detached_if_missing, /* inject_eviction */ false); + + if (key_detached_if_missing) + CHECK(!chunk.getChunk()->pages_populated.any()); + + for (size_t page_idx = 0; page_idx < pages_per_chunk; ++page_idx) + { + bool populated = chunk.getChunk()->pages_populated.get(page_idx); + /// Generate page contents deterministically from key and page index. + size_t start = key_idx * page_idx; + if (start % 37 == 13) + { + /// Leave ~1/37 of the pages unpopulated. + CHECK(!populated); + } + else + { + /// We may write/read the same memory from multiple threads in parallel here. + std::atomic * items = reinterpret_cast *>(chunk.getChunk()->data + cache.pageSize() * page_idx); + if (populated) + { + for (size_t j = 0; j < items_per_page; ++j) + CHECK(items[j].load(std::memory_order_relaxed) == start + j); + } + else + { + for (size_t j = 0; j < items_per_page; ++j) + items[j].store(start + j, std::memory_order_relaxed); + if (!chunk.markPagePopulated(page_idx)) + racing_writes += 1; + } + } + } + + pinned.push_back(std::move(chunk)); + CHECK(cache.getPinnedSize() >= cache.chunkSize()); + /// Unpin 2 chunks on average. + while (rng() % 3 != 0 && !pinned.empty()) + { + size_t idx = rng() % pinned.size(); + if (idx != pinned.size() - 1) + pinned[idx] = std::move(pinned.back()); + pinned.pop_back(); + } + } + + total_racing_writes += racing_writes; + threads_finished += 1; + }; + + std::cout << fmt::format("doing {:.1f} passes over {:.1f} GiB of virtual memory\nthis will take a few minutes, progress printed every 10 seconds", + chunks_touched * 1. / cache.maxChunks(), cache.maxChunks() * cache.chunkSize() * 1. / (1ul << 30)) << std::endl; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (size_t poll = 0;; ++poll) + { + if (threads_finished == num_threads) + break; + if (poll % 100 == 0) + std::cout << fmt::format("{:.3f}%", progress.load() * 100. / num_keys / passes / num_threads * step) << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double touched_gib = chunks_touched * cache.chunkSize() * 1. / (1ul << 30); + std::cout << fmt::format("touched {:.1f} GiB in {:.1f} seconds, that's {:.3f} GiB/s", + touched_gib, elapsed_seconds, touched_gib / elapsed_seconds) << std::endl; + + auto & counters = CurrentThread::getProfileEvents(); + + std::cout << "stats:" + << "\nchunk misses: " << counters[ProfileEvents::PageCacheChunkMisses].load() + << "\nchunk shared: " << counters[ProfileEvents::PageCacheChunkShared].load() + << "\nchunk data misses: " << counters[ProfileEvents::PageCacheChunkDataMisses].load() + << "\nchunk data partial hits: " << counters[ProfileEvents::PageCacheChunkDataPartialHits].load() + << "\nchunk data hits: " << counters[ProfileEvents::PageCacheChunkDataHits].load() + << "\nracing page writes: " << total_racing_writes << std::endl; + + /// Check that we at least hit all the cases. + CHECK(counters[ProfileEvents::PageCacheChunkMisses].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkShared].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataMisses].load() > 0); + /// Partial hits are rare enough that sometimes this is zero, so don't check it. + /// That's good news because we don't need to implement downloading parts of a chunk. + /// CHECK(counters[ProfileEvents::PageCacheChunkDataPartialHits].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataHits].load() > 0); + CHECK(total_racing_writes > 0); + CHECK(cache.getPinnedSize() == 0); + + size_t rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS: " << rss * 1. / (1ul << 30) << " GiB" << std::endl; + /// This can be flaky if the system has < 10% free memory. If this turns out to be a problem, feel free to remove or reduce. + CHECK(rss > ram_size / 10); + + cache.dropCache(); + +#ifdef OS_LINUX + /// MADV_DONTNEED is not synchronous, and we're freeing lots of pages. Let's give Linux a lot of time. + std::this_thread::sleep_for(std::chrono::seconds(10)); + size_t new_rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS after dropping cache: " << new_rss * 1. / (1ul << 30) << " GiB" << std::endl; + CHECK(new_rss < rss / 2); +#endif +} + +/// Benchmark that measures the PageCache overhead for cache hits. Doesn't touch the actual data, so +/// memory bandwidth mostly doesn't factor into this. +/// This measures the overhead of things like madvise(MADV_FREE) and probing the pages (restoreChunkFromLimbo()). +/// Disabled in CI, run manually with --gtest_also_run_disabled_tests --gtest_filter=PageCache.DISABLED_HitsBench +TEST(PageCache, DISABLED_HitsBench) +{ + /// Do a few runs, with and without MADV_FREE. + for (size_t num_threads = 1; num_threads <= 16; num_threads *= 2) + { + for (size_t run = 0; run < 8; ++ run) + { + bool use_madv_free = run % 2 == 1; + bool use_huge_pages = run % 4 / 2 == 1; + + PageCache cache(2 << 20, 1ul << 30, 20ul << 30, use_madv_free, use_huge_pages); + size_t passes = 3; + std::atomic total_misses {0}; + + /// Prepopulate all chunks. + for (size_t i = 0; i < cache.maxChunks(); ++i) + { + PageCacheKey key = i * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + memset(chunk.getChunk()->data, 42, chunk.getChunk()->size); + chunk.markPrefixPopulated(cache.chunkSize()); + } + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + size_t misses = 0; + for (size_t i = 0; i < cache.maxChunks() * passes; ++i) + { + PageCacheKey key = rng() % cache.maxChunks() * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + if (!chunk.isPrefixPopulated(cache.chunkSize())) + misses += 1; + } + total_misses += misses; + }; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double fetched_gib = cache.chunkSize() * cache.maxChunks() * passes * 1. / (1ul << 30); + std::cout << fmt::format( + "threads {}, run {}, use_madv_free = {}, use_huge_pages = {}\nrequested {:.1f} GiB in {:.1f} seconds\n" + "that's {:.1f} GiB/s, or overhead of {:.3}us/{:.1}MiB\n", + num_threads, run, use_madv_free, use_huge_pages, fetched_gib, elapsed_seconds, fetched_gib / elapsed_seconds, + elapsed_seconds * 1e6 / cache.maxChunks() / passes, cache.chunkSize() * 1. / (1 << 20)) << std::endl; + + if (total_misses != 0) + std::cout << "!got " << total_misses.load() << " misses! perhaps your system doesn't have enough free memory, consider decreasing cache size in the benchmark code" << std::endl; + } + } +} diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 9aa90f499d0..48be7f6b84f 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -31,6 +31,7 @@ public: DROP_COMPILED_EXPRESSION_CACHE, DROP_FILESYSTEM_CACHE, DROP_DISK_METADATA_CACHE, + DROP_PAGE_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, DROP_S3_CLIENT_CACHE, diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 39ad28d3dae..a9bdceacef0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1637,10 +1637,6 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME); auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); - /// FIXME https://github.com/ClickHouse/ClickHouse/issues/48465 - if (dynamic_cast(buf.get())) - return true; - readStringUntilEOF(content, *buf); ReadBufferFromString str_buf{content}; VersionMetadata file; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index da90dbb4076..53a18d3cc5b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -727,7 +727,7 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( auto context = getContext(); auto read_buffer_creator = [this, read_settings, object_size] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client, @@ -738,21 +738,25 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( read_settings, /* use_external_buffer */true, /* offset */0, - read_until_position, - /* restricted_seek */true, + /* read_until_position */0, + restricted_seek, object_size); }; + auto modified_settings{read_settings}; + /// User's S3 object may change, don't cache it. + modified_settings.use_page_cache_for_disks_without_file_cache = false; + + /// FIXME: Changing this setting to default value breaks something around parquet reading + modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; + auto s3_impl = std::make_unique( std::move(read_buffer_creator), StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, + "", read_settings, /* cache_log */nullptr, /* use_external_buffer */true); - auto modified_settings{read_settings}; - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); auto async_reader = std::make_unique( std::move(s3_impl), pool_reader, modified_settings, diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f438c6f4f31..d44c80bc410 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -699,6 +699,8 @@ class SettingsRandomizer: get_localzone(), ] ), + "use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7, + "page_cache_inject_eviction": lambda: random.random() < 0.5, } @staticmethod diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e1f5213790d..88f18c52536 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -112,6 +112,7 @@ SYSTEM DROP QUERY CACHE ['SYSTEM DROP QUERY','DROP QUERY CACHE','DROP QUERY'] GL SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FILESYSTEM CACHE ['SYSTEM DROP FILESYSTEM CACHE','DROP FILESYSTEM CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM SYNC FILESYSTEM CACHE ['SYSTEM REPAIR FILESYSTEM CACHE','REPAIR FILESYSTEM CACHE','SYNC FILESYSTEM CACHE'] GLOBAL SYSTEM +SYSTEM DROP PAGE CACHE ['SYSTEM DROP PAGE CACHE','DROP PAGE CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP SCHEMA CACHE ['SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FORMAT SCHEMA CACHE ['SYSTEM DROP FORMAT SCHEMA CACHE','DROP FORMAT SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference new file mode 100644 index 00000000000..5502059508a --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.reference @@ -0,0 +1,23 @@ +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql new file mode 100644 index 00000000000..8765b30ebc3 --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.sql @@ -0,0 +1,105 @@ +-- Tags: no-fasttest, no-parallel +-- no-fasttest because we need an S3 storage policy +-- no-parallel because we look at server-wide counters about page cache usage + +set use_page_cache_for_disks_without_file_cache = 1; +set page_cache_inject_eviction = 0; +set enable_filesystem_cache = 0; +set use_uncompressed_cache = 0; + +create table events_snapshot engine Memory as select * from system.events; +create view events_diff as + -- round all stats to 70 MiB to leave a lot of leeway for overhead + with if(event like '%Bytes%', 70*1024*1024, 35) as granularity, + -- cache hits counter can vary a lot depending on other settings: + -- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk + -- so we just check that the value is not too low + if(event in ( + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages', + 'PageCacheChunkDataHits'), 1, 1000) as clamp + select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff + from system.events new + left outer join events_snapshot old + on old.event = new.event + where diff != 0 and + event in ( + 'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses', + 'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits', + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages') + order by event; + +drop table if exists page_cache_03055; +create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache'; + +-- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files. +system stop merges page_cache_03055; +insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +system start merges page_cache_03055; +optimize table page_cache_03055 final; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Cold read, should miss cache. (Populating cache on write is not implemented yet.) + +select sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should hit cache. + +select sum(k) from page_cache_03055; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Drop cache and read again, should miss. Also don't write to cache. + +system drop page cache; + +select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +-- Data could be read multiple times because we're not writing to cache. +select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should still miss, but populate cache. + +select sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Read again, hit the cache. + +select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + + +-- Known limitation: cache is not invalidated if a table is dropped and created again at the same path. +-- set allow_deprecated_database_ordinary=1; +-- create database test_03055 engine = Ordinary; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (1); +-- select * from test_03055.t; +-- drop table test_03055.t; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (2); +-- select * from test_03055.t; + + +drop table events_snapshot; +drop table page_cache_03055; +drop view events_diff; From 227e3b58067ebaabc281673601408fedd135a5a2 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 29 Feb 2024 12:26:33 +0000 Subject: [PATCH 108/356] Conflict --- src/Parsers/ASTSystemQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 0713737af95..63311a70e42 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -415,6 +415,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::STOP_THREAD_FUZZER: case Type::START_VIEWS: case Type::STOP_VIEWS: + case Type::DROP_PAGE_CACHE: break; case Type::UNKNOWN: case Type::END: From c944e2e8170b6cdd6519dc3f87da0a2dd60f3ac7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 29 Feb 2024 15:47:57 +0100 Subject: [PATCH 109/356] Fix --- src/Coordination/KeeperContext.cpp | 4 +++- src/Coordination/KeeperDispatcher.cpp | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 7c1ff55245e..a36a074ce89 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,4 +1,6 @@ +#include #include + #include #include @@ -442,7 +444,7 @@ bool KeeperContext::waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms bool success = last_committed_log_idx_cv.wait_for( lock, std::chrono::milliseconds(wait_timeout_ms), - [&] { return shutdown_called || last_committed_log_idx >= wait_commit_upto_idx; }); + [&] { return shutdown_called || lastCommittedIndex() >= wait_commit_upto_idx; }); wait_commit_upto_idx.reset(); return success; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 7af9c65e9d3..f6598cfaa17 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -218,7 +218,7 @@ void KeeperDispatcher::requestThread() /// Forcefully process all previous pending requests if (prev_result) result_buf - = forceWaitAndProcessResult(prev_result, prev_batch, /*clear_requests_on_success=*/true); + = forceWaitAndProcessResult(prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); /// Process collected write requests batch if (!current_batch.empty()) @@ -243,7 +243,7 @@ void KeeperDispatcher::requestThread() { if (prev_result) result_buf = forceWaitAndProcessResult( - prev_result, current_batch, /*clear_requests_on_success=*/!execute_requests_after_write); + prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); /// In case of older version or disabled async replication, result buf will be set to value of `commit` function /// which always returns nullptr @@ -257,11 +257,13 @@ void KeeperDispatcher::requestThread() /// if timeout happened set error responses for the requests if (!keeper_context->waitCommittedUpto(log_idx, coordination_settings->operation_timeout_ms.totalMilliseconds())) - addErrorResponses(current_batch, Coordination::Error::ZOPERATIONTIMEOUT); + addErrorResponses(prev_batch, Coordination::Error::ZOPERATIONTIMEOUT); if (shutdown_called) return; } + + prev_batch.clear(); } if (has_reconfig_request) From f40e25f5c38b3e554c4a795546c08dd8fb3e96f2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 00:21:50 +0100 Subject: [PATCH 110/356] Check all shell scripts in docker directories --- docker/test/server-jepsen/run.sh | 4 +++- docker/test/stateless/attach_gdb.lib | 1 + docker/test/stateless/stress_tests.lib | 32 +++++++++++++------------- docker/test/stress/run.sh | 12 +++++++++- docker/test/upgrade/run.sh | 4 +++- utils/check-style/shellcheck-run.sh | 14 +++++------ 6 files changed, 41 insertions(+), 26 deletions(-) diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 81e442e65b6..09198ca1968 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -20,6 +20,8 @@ if [ -n "$WITH_LOCAL_BINARY" ]; then clickhouse_source="--clickhouse-source /clickhouse" fi +# $TESTS_TO_RUN comes from docker +# shellcheck disable=SC2153 tests_count="--test-count $TESTS_TO_RUN" tests_to_run="test-all" workload="" @@ -47,6 +49,6 @@ fi cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse" -(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" +(lein run server $tests_to_run "$workload" --keeper "$KEEPER_NODE" "$concurrency" "$nemesis" "$rate" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 "$clickhouse_source" "$tests_count" --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" mv store "$TEST_OUTPUT/" diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index f4738cdc333..d288288bb17 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck source=./utils.lib source /utils.lib function attach_gdb_to_clickhouse() diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 72c2d8d2f2d..ae98f9a0a3a 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -19,7 +19,7 @@ function escaped() function head_escaped() { - head -n $FAILURE_CONTEXT_LINES $1 | escaped + head -n "$FAILURE_CONTEXT_LINES" "$1" | escaped } function unts() @@ -29,15 +29,15 @@ function unts() function trim_server_logs() { - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped + head -n :$FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped } function install_packages() { - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb + dpkg -i "$1"/clickhouse-common-static_*.deb + dpkg -i "$1"/clickhouse-common-static-dbg_*.deb + dpkg -i "$1"/clickhouse-server_*.deb + dpkg -i "$1"/clickhouse-client_*.deb } function configure() @@ -54,11 +54,11 @@ function configure() sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml function randomize_config_boolean_value { - value=$(($RANDOM % 2)) - sudo cat /etc/clickhouse-server/config.d/$2.xml \ + value=$((RANDOM % 2)) + sudo cat "/etc/clickhouse-server/config.d/$2.xml" \ | sed "s|<$1>[01]|<$1>$value|" \ - > /etc/clickhouse-server/config.d/$2.xml.tmp - sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml + > "/etc/clickhouse-server/config.d/$2.xml.tmp" + sudo mv "/etc/clickhouse-server/config.d/$2.xml.tmp" "/etc/clickhouse-server/config.d/$2.xml" } if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then @@ -156,7 +156,7 @@ function stop() clickhouse stop --max-tries "$max_tries" --do-not-kill && return - if [ $check_hang == true ] + if [ "$check_hang" == true ] then # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. # Add a special status just in case, so it will be possible to find in the CI DB @@ -165,7 +165,7 @@ function stop() sleep 5 # The server could finally stop while we were terminating gdb, let's recheck if it's still running - kill -s 0 $pid || return + kill -s 0 "$pid" || return echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log @@ -181,7 +181,7 @@ function start() counter=0 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt ${1:-120} ] + if [ "$counter" -gt "${1:-120}" ] then echo "Cannot start clickhouse-server" rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: @@ -286,9 +286,9 @@ function collect_query_and_trace_logs() function collect_core_dumps() { - find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ done } diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 3981c4dd416..1f0d55605af 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib install_packages package_folder @@ -55,6 +57,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml +# shellcheck disable=SC2119 start setup_logs_replication @@ -65,6 +68,7 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" +# shellcheck disable=SC2119 stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log @@ -85,6 +89,7 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi +# shellcheck disable=SC2119 start clickhouse-client --query "SHOW TABLES FROM datasets" @@ -188,6 +193,7 @@ clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "SYSTEM STOP THREAD FUZZER" +# shellcheck disable=SC2119 stop # Let's enable S3 storage by default @@ -222,6 +228,7 @@ if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then > /etc/clickhouse-server/config.d/enable_async_load_databases.xml fi +# shellcheck disable=SC2119 start stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ @@ -232,6 +239,7 @@ stress --hung-check --drop-databases --output-folder test_output --skip-func-tes rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" >> /test_output/test_results.tsv +# shellcheck disable=SC2119 stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log @@ -239,10 +247,12 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c # In debug build it can take a lot of time. unset "${!THREAD_@}" +# shellcheck disable=SC2119 start check_server_start +# shellcheck disable=SC2119 stop [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" @@ -272,7 +282,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Signal 9%') DESC, (test like '%Fatal message%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index a139327e12e..f78d0812e97 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -335,7 +337,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Changed settings%') DESC, (test like '%New settings%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index bdb0f681c31..b848bb46bcf 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -2,13 +2,13 @@ ROOT_PATH=$(git rev-parse --show-toplevel) NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck -( cd "$ROOT_PATH/tests/queries/0_stateless/" && \ - find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ - xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 -) +find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ + xargs -0 -P "$NPROC" -n 1 shellcheck --check-sourced --external-sources --source-path=SCRIPTDIR \ + --severity info --exclude SC1071,SC2086,SC2016 # Check docker scripts with shellcheck -find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | \ - awk -F' ' '$2==" text/x-shellscript" {print $1}' | \ +# Do not check sourced files, since it causes broken --source-path=SCRIPTDIR +find "$ROOT_PATH/docker" -type f -exec file -F' ' --mime-type {} + | \ + awk '$2=="text/x-shellscript" {print $1}' | \ grep -v "compare.sh" | \ - xargs -P "$NPROC" -n 20 shellcheck + xargs -P "$NPROC" -n 1 shellcheck --external-sources --source-path=SCRIPTDIR From 82a830819815eb805ffe39cf2c2e3e0870c55ffa Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 11:29:14 +0100 Subject: [PATCH 111/356] Update shellcheck to the latest release --- docker/test/style/Dockerfile | 14 +++++++++++++- .../0_stateless/01232_json_as_string_format.sh | 4 ++-- .../0_stateless/01460_line_as_string_format.sh | 6 ++++-- .../01541_max_memory_usage_for_user_long.sh | 4 +++- .../01548_query_log_query_execution_ms.sh | 2 +- ...1737_clickhouse_server_wait_server_pool_long.sh | 6 ++++-- utils/check-style/shellcheck-run.sh | 4 ++-- 7 files changed, 29 insertions(+), 11 deletions(-) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index f2bac2f5da4..abc2dba0e9d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,7 +16,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ moreutils \ python3-fuzzywuzzy \ python3-pip \ - shellcheck \ yamllint \ locales \ && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ @@ -30,6 +29,19 @@ ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH +ARG SHELLCHECK_VERSION=0.9.0 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) sarch=x86_64 ;; \ + arm64) sarch=aarch64 ;; \ + esac \ + && curl -L \ + "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.${sarch}.tar.xz" \ + | tar xJ --strip=1 -C /tmp \ + && mv /tmp/shellcheck /usr/bin \ + && rm -rf /tmp/* + + # Get act and actionlint from releases RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ diff --git a/tests/queries/0_stateless/01232_json_as_string_format.sh b/tests/queries/0_stateless/01232_json_as_string_format.sh index 667aea7ba78..bb6f86e8686 100755 --- a/tests/queries/0_stateless/01232_json_as_string_format.sh +++ b/tests/queries/0_stateless/01232_json_as_string_format.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS json_as_string"; $CLICKHOUSE_CLIENT --query="CREATE TABLE json_as_string (field String) ENGINE = Memory"; -echo ' +echo -e ' { "id" : 1, "date" : "01.01.2020", @@ -44,7 +44,7 @@ echo ' } }' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; -echo ' +echo -e ' [ { "id" : 1, diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh index 4ab9cb59858..6fa922eb730 100755 --- a/tests/queries/0_stateless/01460_line_as_string_format.sh +++ b/tests/queries/0_stateless/01460_line_as_string_format.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string1"; $CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string1(field String) ENGINE = Memory"; -echo '"id" : 1, +echo -e '"id" : 1, "date" : "01.01.2020", "string" : "123{{{\"\\", "array" : [1, 2, 3], @@ -26,7 +26,9 @@ $CLICKHOUSE_CLIENT --query="create table line_as_string2( $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) values ('ClickHouse')"; -echo 'ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString"; +# Shellcheck thinks `fast` is a shell expansion +# shellcheck disable=SC2016 +echo -e 'ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString"; $CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string2 order by c"; $CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string2" diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh index e2d0306fee0..9f0699929f8 100755 --- a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh @@ -45,11 +45,13 @@ query_id=$$-$RANDOM-$SECONDS ${CLICKHOUSE_CLIENT} --user=test_01541 --max_block_size=1 --format Null --query_id $query_id -q 'SELECT sleepEachRow(1) FROM numbers(600)' & # trap sleep_query_pid=$! +# Shellcheck wrongly process "trap" https://www.shellcheck.net/wiki/SC2317 +# shellcheck disable=SC2317 function cleanup() { echo 'KILL sleep' # if the timeout will not be enough, it will trigger "No such process" error/message - kill $sleep_query_pid + kill "$sleep_query_pid" # waiting for a query to finish while ${CLICKHOUSE_CLIENT} -q "SELECT query_id FROM system.processes WHERE query_id = '$query_id'" | grep -xq "$query_id"; do sleep 0.1 diff --git a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh index 0d13a1d4eff..48cbd57c1c0 100755 --- a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh +++ b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh @@ -47,7 +47,7 @@ function main() { # retries, since there is no guarantee that every time query will take ~0.4 second. local retries=20 i=0 - while [ "$(test_query_duration_ms | xargs)" != '1 1' ] && [[ $i < $retries ]]; do + while [ "$(test_query_duration_ms | xargs)" != '1 1' ] && (( i < retries )); do ((++i)) done } diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh index adab3906e5b..cd8abb57a80 100755 --- a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -18,10 +18,12 @@ CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" >& cl server_pid=$! trap cleanup EXIT +# Shellcheck wrongly process "trap" https://www.shellcheck.net/wiki/SC2317 +# shellcheck disable=SC2317 function cleanup() { - kill -9 $server_pid - kill -9 $client_pid + kill -9 "$server_pid" + kill -9 "$client_pid" echo "Test failed. Server log:" cat clickhouse-server.log diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index b848bb46bcf..5930e537703 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -3,7 +3,7 @@ ROOT_PATH=$(git rev-parse --show-toplevel) NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ - xargs -0 -P "$NPROC" -n 1 shellcheck --check-sourced --external-sources --source-path=SCRIPTDIR \ + xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --source-path=SCRIPTDIR \ --severity info --exclude SC1071,SC2086,SC2016 # Check docker scripts with shellcheck @@ -11,4 +11,4 @@ find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | find "$ROOT_PATH/docker" -type f -exec file -F' ' --mime-type {} + | \ awk '$2=="text/x-shellscript" {print $1}' | \ grep -v "compare.sh" | \ - xargs -P "$NPROC" -n 1 shellcheck --external-sources --source-path=SCRIPTDIR + xargs -P "$NPROC" -n 20 shellcheck --external-sources --source-path=SCRIPTDIR From ffbdd11ba452dce418dccc2c8c25581f4d3db38e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 14:16:09 +0100 Subject: [PATCH 112/356] Use a proper way to pass the text to client --- .../queries/0_stateless/01232_json_as_string_format.sh | 10 ++++++---- .../queries/0_stateless/01460_line_as_string_format.sh | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01232_json_as_string_format.sh b/tests/queries/0_stateless/01232_json_as_string_format.sh index bb6f86e8686..8d2fe193b55 100755 --- a/tests/queries/0_stateless/01232_json_as_string_format.sh +++ b/tests/queries/0_stateless/01232_json_as_string_format.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS json_as_string"; $CLICKHOUSE_CLIENT --query="CREATE TABLE json_as_string (field String) ENGINE = Memory"; -echo -e ' +cat << 'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; { "id" : 1, "date" : "01.01.2020", @@ -42,9 +42,10 @@ echo -e ' "{" : 1, "}}" : 2 } -}' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; +} +EOF -echo -e ' +cat << 'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; [ { "id" : 1, @@ -79,7 +80,8 @@ echo -e ' "}}" : 2 } } -]' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; +] +EOF $CLICKHOUSE_CLIENT --query="SELECT * FROM json_as_string ORDER BY field"; diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh index 6fa922eb730..a8782dd2d32 100755 --- a/tests/queries/0_stateless/01460_line_as_string_format.sh +++ b/tests/queries/0_stateless/01460_line_as_string_format.sh @@ -7,12 +7,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string1"; $CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string1(field String) ENGINE = Memory"; -echo -e '"id" : 1, +cat <<'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString"; +"id" : 1, "date" : "01.01.2020", "string" : "123{{{\"\\", "array" : [1, 2, 3], -Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString"; +Finally implement this new feature. +EOF $CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string1"; $CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string1" From 1b8ae25153a06a630e8f3553d30494a497b9c449 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 29 Feb 2024 18:16:05 +0100 Subject: [PATCH 113/356] Use cancel instead of finish in case of exception --- src/Processors/Executors/PipelineExecutor.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 8477e011763..5b5880759e6 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -391,7 +391,9 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) SCOPE_EXIT_SAFE( if (!finished_flag) { - finish(); + /// If finished_flag is not set, there was an exception. + /// Cancel execution in this case. + cancel(); if (pool) pool->wait(); } @@ -399,18 +401,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) if (num_threads > 1) { - try - { - spawnThreads(); // start at least one thread - } - catch (...) - { - /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK. - /// We should cancel execution properly before rethrow. - cancel(); - throw; - } - + spawnThreads(); // start at least one thread tasks.processAsyncTasks(); pool->wait(); } From 8a4e15536b86727003146c16e66bb8720463713e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 29 Feb 2024 18:01:54 +0000 Subject: [PATCH 114/356] refactoring of virtual columns --- src/Interpreters/InterpreterDescribeQuery.cpp | 20 +- src/Interpreters/InterpreterDescribeQuery.h | 1 + src/Interpreters/MutationsInterpreter.cpp | 4 +- src/Storages/ColumnsDescription.cpp | 1 - src/Storages/ColumnsDescription.h | 7 - src/Storages/FileLog/FileLogSource.cpp | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 4 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 4 +- src/Storages/Hive/StorageHive.cpp | 4 +- src/Storages/IStorage.h | 4 +- src/Storages/Kafka/StorageKafka.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 19 +- src/Storages/NATS/StorageNATS.cpp | 4 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 +- src/Storages/S3Queue/StorageS3Queue.cpp | 4 +- src/Storages/StorageAzureBlob.cpp | 4 +- src/Storages/StorageAzureBlobCluster.cpp | 4 +- src/Storages/StorageDistributed.cpp | 4 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageFileCluster.cpp | 4 +- src/Storages/StorageMaterializedView.cpp | 10 +- src/Storages/StorageMaterializedView.h | 2 + src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageS3Cluster.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURLCluster.cpp | 4 +- src/Storages/VirtualColumnsDescription.cpp | 17 +- src/Storages/VirtualColumnsDescription.h | 19 +- .../02890_describe_table_options.reference | 388 ++++++++---------- .../02890_describe_table_options.sql | 2 + 31 files changed, 250 insertions(+), 314 deletions(-) diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 1aab72afcc1..11542931775 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -123,28 +123,29 @@ BlockIO InterpreterDescribeQuery::execute() void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression) { - NamesAndTypesList names_and_types; + Block sample_block; auto select_query = table_expression.subquery->children.at(0); auto current_context = getContext(); if (settings.allow_experimental_analyzer) { SelectQueryOptions select_query_options; - names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList(); + sample_block = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock(); } else { - names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList(); + sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context); } - for (auto && [name, type] : names_and_types) - columns.emplace_back(std::move(name), std::move(type)); + for (auto && column : sample_block) + columns.emplace_back(std::move(column.name), std::move(column.type)); } void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression) { auto current_context = getContext(); TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context); + auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & column : column_descriptions) columns.emplace_back(column); @@ -154,14 +155,15 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName()); if (table) { - for (const auto & column : table->getVirtuals()) + for (const auto & column : *table->getVirtualsDescription()) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } } } + void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression) { auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name); @@ -176,10 +178,10 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t if (settings.describe_include_virtual_columns) { - for (const auto & column : table->getVirtuals()) + for (const auto & column : *table->getVirtualsDescription()) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } diff --git a/src/Interpreters/InterpreterDescribeQuery.h b/src/Interpreters/InterpreterDescribeQuery.h index 5d01745db6b..64573bcb93f 100644 --- a/src/Interpreters/InterpreterDescribeQuery.h +++ b/src/Interpreters/InterpreterDescribeQuery.h @@ -4,6 +4,7 @@ #include #include #include +#include "Storages/VirtualColumnsDescription.h" namespace DB diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 8ac25d13bf0..7a0d68f7c87 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -346,7 +346,7 @@ bool MutationsInterpreter::Source::isCompactPart() const return part && part->getType() == MergeTreeDataPartType::Compact; } -static Names getAvailableColumnswithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) +static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) { auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical(); for (const auto & column : storage.getVirtuals()) @@ -363,7 +363,7 @@ MutationsInterpreter::MutationsInterpreter( : MutationsInterpreter( Source(storage_), metadata_snapshot_, std::move(commands_), - getAvailableColumnswithVirtuals(metadata_snapshot_, *storage_), + getAvailableColumnsWithVirtuals(metadata_snapshot_, *storage_), std::move(context_), std::move(settings_)) { if (settings.can_execute && dynamic_cast(source.getStorage().get())) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 6ee87297cab..e08dac3a332 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,7 +31,6 @@ #include #include #include -#include namespace DB diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index cc6c7d0e1b9..2a5bbe3c78e 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -66,12 +66,6 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withPersistentVirtuals(bool value = true) - { - with_persistent_virtuals = value; - return *this; - } - GetColumnsOptions & withExtendedObjects(bool value = true) { with_extended_objects = value; @@ -82,7 +76,6 @@ struct GetColumnsOptions VirtualsKind virtuals_kind = VirtualsKind::None; bool with_subcolumns = false; - bool with_persistent_virtuals = false; bool with_extended_objects = false; }; diff --git a/src/Storages/FileLog/FileLogSource.cpp b/src/Storages/FileLog/FileLogSource.cpp index b1192af4ced..eb3ff0436a5 100644 --- a/src/Storages/FileLog/FileLogSource.cpp +++ b/src/Storages/FileLog/FileLogSource.cpp @@ -31,7 +31,7 @@ FileLogSource::FileLogSource( , max_streams_number(max_streams_number_) , handle_error_mode(handle_error_mode_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames())) + , virtual_header(storage_snapshot->virtual_columns->getSampleBlock()) { consumer = std::make_unique(storage, max_block_size, poll_time_out, context, stream_number_, max_streams_number_); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 23e869bfa89..2911f9b268b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -148,9 +148,7 @@ StorageFileLog::StorageFileLog( storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = createVirtuals(filelog_settings->handle_error_mode); - setVirtuals(virtuals); + setVirtuals(createVirtuals(filelog_settings->handle_error_mode)); if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index ca5ff263876..4a481a6a7d2 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -241,9 +241,7 @@ StorageHDFS::StorageHDFS( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } namespace diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 80aec36999c..8c6d7ce5670 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -72,9 +72,7 @@ StorageHDFSCluster::StorageHDFSCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index f8c10ea4249..88ab8e15e76 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -445,9 +445,7 @@ StorageHive::StorageHive( storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHive::lazyInitialize() diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index bd64447a00e..e7ebb45cb46 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -11,13 +11,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include @@ -230,7 +230,7 @@ public: /// /// By default return empty list of columns. VirtualsDescriptionPtr getVirtualsDescription() const { return virtuals.get(); } - virtual NamesAndTypesList getVirtuals() const { return virtuals.get()->getNamesAndTypesList(); } + NamesAndTypesList getVirtuals() const { return virtuals.get()->getNamesAndTypesList(); } Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); } Names getAllRegisteredNames() const override; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 3656b599788..4b87b1f7b74 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -345,9 +345,7 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); - - auto virtuals = createVirtuals(kafka_settings->kafka_handle_error_mode); - setVirtuals(virtuals); + setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index cb319348b60..e52362ff76a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -435,23 +435,22 @@ MergeTreeData::MergeTreeData( VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMetadata & metadata) { VirtualColumnsDescription desc; - auto low_cardinality_type = std::make_shared(std::make_shared()); - desc.addEphemeral("_part", low_cardinality_type, ""); - desc.addEphemeral("_part_index", std::make_shared(), ""); - desc.addEphemeral("_part_uuid", std::make_shared(), ""); - desc.addEphemeral("_partition_id", low_cardinality_type, ""); - desc.addEphemeral("_sample_factor", std::make_shared(), ""); - desc.addEphemeral("_part_offset", std::make_shared(), ""); + desc.addEphemeral("_part", std::make_shared(std::make_shared()), "Name of part"); + desc.addEphemeral("_part_index", std::make_shared(), "Sequential index of the part in the query result"); + desc.addEphemeral("_part_uuid", std::make_shared(), "Unique part identifier (if enabled MergeTree setting assign_part_uuids)"); + desc.addEphemeral("_partition_id", std::make_shared(std::make_shared()), "Name of partition"); + desc.addEphemeral("_sample_factor", std::make_shared(), "Sample factor (from the query)"); + desc.addEphemeral("_part_offset", std::make_shared(), "Number of row in the part"); if (metadata.hasPartitionKey()) { auto partition_types = metadata.partition_key.sample_block.getDataTypes(); - desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), ""); + desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), "Value (a tuple) of a PARTITION BY expression"); } - desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, ""); - desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, ""); + desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted"); + desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert"); return desc; } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index a06221e1b26..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -88,9 +88,7 @@ StorageNATS::StorageNATS( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); - - auto virtuals = createVirtuals(nats_settings->nats_handle_error_mode); - setVirtuals(virtuals); + setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); nats_context = addSettings(getContext()); nats_context->makeQueryContext(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fb7b9c10d7f..c5df752fb7f 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -137,9 +137,7 @@ StorageRabbitMQ::StorageRabbitMQ( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); - - auto virtuals = createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode); - setVirtuals(virtuals); + setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode)); rabbitmq_context = addSettings(getContext()); rabbitmq_context->makeQueryContext(); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 724aa88e7e4..765fcbd9684 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -155,9 +155,7 @@ StorageS3Queue::StorageS3Queue( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 41ec5961ec3..a74e274e5d9 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -492,9 +492,7 @@ StorageAzureBlob::StorageAzureBlob( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); StoredObjects objects; for (const auto & key : configuration.blobs_paths) diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index ce318deada6..155a7220885 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -63,9 +63,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 83eb3e55853..541b2934892 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -299,8 +299,8 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() StorageInMemoryMetadata metadata; auto desc = MergeTreeData::createVirtuals(metadata); - desc.addEphemeral("_table", std::make_shared(std::make_shared()), ""); - desc.addEphemeral("_shard_num", std::make_shared(), "deprecated"); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), "Name of a table"); + desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); return desc; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index da52b960e8b..8eaf794f445 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1097,9 +1097,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index 7aeecf7095a..84691472809 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -61,9 +61,7 @@ StorageFileCluster::StorageFileCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 9e623001520..7e953889b34 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -21,6 +22,7 @@ #include #include +#include "Storages/StorageSnapshot.h" #include #include #include @@ -153,8 +155,6 @@ StorageMaterializedView::StorageMaterializedView( *query.refresh_strategy); refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty; } - - setVirtuals(*getTargetTable()->getVirtualsDescription()); } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -167,6 +167,12 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata, local_context), query_info); } +StorageSnapshotPtr StorageMaterializedView::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + /// We cannot set virtuals at table creation because target table may not exist at that time. + return std::make_shared(*this, metadata_snapshot, getTargetTable()->getVirtualsDescription()); +} + void StorageMaterializedView::read( QueryPlan & query_plan, const Names & column_names, diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index d716527b5ad..198b7a642ee 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -78,6 +78,8 @@ public: ActionLock getActionLock(StorageActionBlockType type) override; void onActionLockRemove(StorageActionBlockType action_type) override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 75ba0635125..691d197aee6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1075,9 +1075,7 @@ StorageS3::StorageS3( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } static std::shared_ptr createFileIterator( diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index b9e671cdbb8..7641c66eefd 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -69,9 +69,7 @@ StorageS3Cluster::StorageS3Cluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 6aa719ec321..e0c5160a43b 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -152,9 +152,7 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 2f492bd7950..334c2ca249b 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -75,9 +75,7 @@ StorageURLCluster::StorageURLCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - auto virtuals = VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()); - setVirtuals(virtuals); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp index 65223071dfe..d9f9cbe047e 100644 --- a/src/Storages/VirtualColumnsDescription.cpp +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -19,11 +19,12 @@ VirtualColumnDescription::VirtualColumnDescription( { } -void VirtualColumnsDescription::add(VirtualColumnDescription desc_) +void VirtualColumnsDescription::add(VirtualColumnDescription desc) { - auto [it, inserted] = container.emplace(std::move(desc_)); - if (!inserted) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Virtual column {} already exists", it->name); + if (container.get<1>().contains(desc.name)) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Virtual column {} already exists", desc.name); + + container.get<0>().push_back(std::move(desc)); } void VirtualColumnsDescription::addEphemeral(String name, DataTypePtr type, String comment) @@ -47,8 +48,8 @@ NamesAndTypesList VirtualColumnsDescription::get(VirtualsKind kind) const std::optional VirtualColumnsDescription::tryGet(const String & name, VirtualsKind kind) const { - auto it = container.find(name); - if (it != container.end() && (static_cast(it->kind) & static_cast(kind))) + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) return NameAndTypePair{it->name, it->type}; return {}; } @@ -63,8 +64,8 @@ NameAndTypePair VirtualColumnsDescription::get(const String & name, VirtualsKind std::optional VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const { - auto it = container.find(name); - if (it != container.end() && (static_cast(it->kind) & static_cast(kind))) + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) return *it; return {}; } diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h index 2564e49a0e9..3d79167c623 100644 --- a/src/Storages/VirtualColumnsDescription.h +++ b/src/Storages/VirtualColumnsDescription.h @@ -16,20 +16,18 @@ public: bool isEphemeral() const { return kind == VirtualsKind::Ephemeral; } bool isPersistent() const { return kind == VirtualsKind::Persistent; } - - struct Comparator - { - using is_transparent = void; - bool operator()(const Self & lhs, const Self & rhs) const { return lhs.name < rhs.name; } - bool operator()(const Self & lhs, const String & rhs) const { return lhs.name < rhs; } - bool operator()(const String & lhs, const Self & rhs) const { return lhs < rhs.name; } - }; + const String & getName() const { return name; } }; class VirtualColumnsDescription { public: - using Container = std::set; + using Container = boost::multi_index_container< + VirtualColumnDescription, + boost::multi_index::indexed_by< + boost::multi_index::sequenced<>, + boost::multi_index::ordered_unique>>>; + using const_iterator = Container::const_iterator; const_iterator begin() const { return container.begin(); } @@ -41,8 +39,9 @@ public: void addEphemeral(String name, DataTypePtr type, String comment); void addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment); + size_t size() const { return container.size(); } bool empty() const { return container.empty(); } - bool has(const String & name) const { return container.contains(name); } + bool has(const String & name) const { return container.get<1>().contains(name); } NameAndTypePair get(const String & name, VirtualsKind kind) const; std::optional tryGet(const String & name, VirtualsKind kind) const; diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 5d99df36bb4..ecbfe5a9f4d 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -2,237 +2,201 @@ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠-│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠+│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠-│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠+│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 1 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _table │ LowCardinality(String) │ │ │ Name of a table │ │ │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _table │ LowCardinality(String) │ │ │ Name of a table │ │ │ 0 │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┠-│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┠+│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┠-│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┠+│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_virtual─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _partition_value │ UInt8 │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -└──────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_virtual─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _table │ LowCardinality(String) │ 1 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -│ _shard_num │ UInt32 │ 1 │ -└────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +│ _table │ LowCardinality(String) │ 1 │ +│ _shard_num │ UInt32 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _partition_value │ UInt8 │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ _shard_num │ UInt32 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ _table │ LowCardinality(String) │ 0 │ 1 │ +│ _shard_num │ UInt32 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ diff --git a/tests/queries/0_stateless/02890_describe_table_options.sql b/tests/queries/0_stateless/02890_describe_table_options.sql index 236100148ff..63806c7ee3d 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.sql +++ b/tests/queries/0_stateless/02890_describe_table_options.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t_describe_options; +SET print_pretty_type_names = 0; + CREATE TABLE t_describe_options ( id UInt64 COMMENT 'index column', arr Array(UInt64) DEFAULT [10, 20] CODEC(ZSTD), From d4702eaddf2a97165f672250afe49ea1f1193e53 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 29 Feb 2024 19:34:17 +0100 Subject: [PATCH 115/356] Fix test --- .../0_stateless/02998_analyzer_secret_args_tree_node.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql index f1d916eca80..f40b40b6c8c 100644 --- a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest: encrypt function doesn't exist in the fastest build +-- Tags: no-fasttest +-- encrypt function doesn't exist in the fastest build -- { echoOn } SET allow_experimental_analyzer = 1; From 621ea46b57be3c9422a1fe54db11dbe1e5d726ad Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:36:56 -0500 Subject: [PATCH 116/356] Update 02998_analyzer_secret_args_tree_node.sql --- .../0_stateless/02998_analyzer_secret_args_tree_node.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql index f40b40b6c8c..e5b7a9fe20a 100644 --- a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -1,5 +1,4 @@ --- Tags: no-fasttest --- encrypt function doesn't exist in the fastest build +-- Tag: no-fasttest: encrypt function doesn't exist in the fastest build -- { echoOn } SET allow_experimental_analyzer = 1; From caf50f4ea1613343a1e4fb5bed0bee8917afaa3c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 29 Feb 2024 21:09:12 +0000 Subject: [PATCH 117/356] fix test --- src/Interpreters/InterpreterDescribeQuery.h | 1 - src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterDescribeQuery.h b/src/Interpreters/InterpreterDescribeQuery.h index 64573bcb93f..5d01745db6b 100644 --- a/src/Interpreters/InterpreterDescribeQuery.h +++ b/src/Interpreters/InterpreterDescribeQuery.h @@ -4,7 +4,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" namespace DB diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 1a32b885f4d..896182c1f0f 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1119,7 +1119,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select optimize_trivial_count = true; for (const auto & required_column : required) { - if (partition_columns_set.contains(required_column)) + if (!partition_columns_set.contains(required_column)) { optimize_trivial_count = false; break; From c1d542d700ef38a1d9ddb5cba02a2b35fecbef6a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Feb 2024 21:19:40 +0000 Subject: [PATCH 118/356] Update version_date.tsv and changelogs after v24.2.1.2248-stable --- SECURITY.md | 3 +- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.2.1.2248-stable.md | 462 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 468 insertions(+), 4 deletions(-) create mode 100644 docs/changelogs/v24.2.1.2248-stable.md diff --git a/SECURITY.md b/SECURITY.md index 79ca0269838..86578b188d8 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 24.2 | âœ”ï¸ | | 24.1 | âœ”ï¸ | | 23.12 | âœ”ï¸ | -| 23.11 | âœ”ï¸ | +| 23.11 | ⌠| | 23.10 | ⌠| | 23.9 | ⌠| | 23.8 | âœ”ï¸ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index d39ca312454..2f42854a972 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 2d07937ad79..7bd777de5b9 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index d4775b17319..03d01cfd5d7 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md new file mode 100644 index 00000000000..6113dd51ab1 --- /dev/null +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -0,0 +1,462 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.2.1.2248-stable (891689a4150) FIXME as compared to v24.1.1.2048-stable (5a024dfc093) + +#### Backward Incompatible Change +* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)). +* The sort clause `ORDER BY ALL` (introduced with v23.12) is replaced by `ORDER BY *`. The previous syntax was too error-prone for tables with a column `all`. [#59450](https://github.com/ClickHouse/ClickHouse/pull/59450) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename the setting extract_kvp_max_pairs_per_row to extract_key_value_pairs_max_pairs_per_row. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)). +* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Added maximum sequential login failures to the quota. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) ([pufit](https://github.com/pufit)). +* Backup & Restore support for AzureBlobStorage resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)). +* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)). +* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)). +* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)). +* Add function variantType that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)). +* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). +* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)). +* Provides new aggregate function ‘groupArrayIntersect’. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)). +* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)). +* Support single-argument version for the merge table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)). + +#### Performance Improvement +* Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section. [#52230](https://github.com/ClickHouse/ClickHouse/pull/52230) ([JackyWoo](https://github.com/JackyWoo)). +* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)). +* Continue optimizing branch miss of if function when result type is float*/decimal*/int* , follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Optimize if function when input type is map, speed up by ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of Int8 type by implementing strict aliasing. [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)). +* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Reuse the result of `FunctionFactory::instance().get("isNotNull", context)` and `FunctionFactory::instance().get("assumeNotNull", context)`. Make sure it is called once during the lifetime of `FunctionCoalesce`. [#59627](https://github.com/ClickHouse/ClickHouse/pull/59627) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)). +* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)). +* As is shown in Fig 1, the replacement of "&&" with "&" could generate the SIMD code. ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/a5a72ac4-6dc6-4d52-835a-4f512e55f0b9) Fig 1. Code compiled from '&&' (left) and '&' (right). [#60498](https://github.com/ClickHouse/ClickHouse/pull/60498) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). + +#### Improvement +* Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Do not consider data part as broken if projection is broken. Closes [#56593](https://github.com/ClickHouse/ClickHouse/issues/56593). [#56864](https://github.com/ClickHouse/ClickHouse/pull/56864) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)). +* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now writes the correct values: The default value for existing parts with default value or the non-default value for existing parts with non-default value. Previously, the default value was written for all existing parts. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)). +* Enabled a backoff logic (e.g. exponential). Will provide an ability for reduced CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add improvement to count InitialQuery. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)). +* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)). +* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)). +* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)). +* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)). +* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow parallel and distributed processing for `S3Queue` table engine. For distributed processing use setting `s3queue_total_shards_num` (by default `1`). Setting `s3queue_processing_threads_num` previously was not allowed for Ordered processing mode, now it is allowed. Warning: settings `s3queue_processing_threads_num`(processing threads per each shard) and `s3queue_total_shards_num` for ordered mode change how metadata is stored (make the number of `max_processed_file` nodes equal to `s3queue_processing_threads_num * s3queue_total_shards_num`), so they must be the same for all shards and cannot be changed once at least one shard is created. [#59167](https://github.com/ClickHouse/ClickHouse/pull/59167) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow concurrent table creation in `DatabaseReplicated` during `recoverLostReplica`. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. ``` false ```. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)). +* Use MergeTree as a default table engine. It makes the usability much better, and closer to ClickHouse Cloud. [#59316](https://github.com/ClickHouse/ClickHouse/pull/59316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)). +* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)). +* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)). +* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to define a starting point for S3Queue with Ordered mode at creation using setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)). +* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)). +* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)). +* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix stack unwinding on MacOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Perform synchronous inserts if dependent MV deduplication is enabled through deduplicate_blocks_in_dependent_materialized_views=1. [#59699](https://github.com/ClickHouse/ClickHouse/pull/59699) ([Julia Kartseva](https://github.com/jkartseva)). +* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. This settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). +* Unify xml and sql created named collection behaviour in kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)). +* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)). +* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new config setting `backups.remove_backup_files_after_failure`: ``` true ```. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)). +* Support specifying users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)). +* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement comparison operator for Variant values and proper Field inserting into Variant column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)). +* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)). +* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)). +* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)). +* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. e.g. ``` ┌──────count()─┠│ 233765663884 │ -- 233.77 billion └──────────────┘ ```. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)). +* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Include `pytest-reportlog` in integration test CI runner Dockerfile to enable JSON test reports. [#58926](https://github.com/ClickHouse/ClickHouse/pull/58926) ([MyroTk](https://github.com/MyroTk)). +* Update the rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)). +* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)). +* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). +* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)). +* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test that validates projections still work after attach partition. [#60415](https://github.com/ClickHouse/ClickHouse/pull/60415) ([Arthur Passos](https://github.com/arthurpassos)). +* Add test that validates attach partition fails if structure differs because of materialized column. [#60418](https://github.com/ClickHouse/ClickHouse/pull/60418) ([Arthur Passos](https://github.com/arthurpassos)). +* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speedup check-whitespaces check. [#60496](https://github.com/ClickHouse/ClickHouse/pull/60496) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Non ready set in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix quantilesGK bug [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug with `intDiv` for decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). +* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). +* s3queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix crash in JSONColumnsWithMetadata format over http [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not rewrite sum() to count() if return value differs in analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). +* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). +* Run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* Fix scale conversion for DateTime64 [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix INSERT into SQLite with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). +* Fix several logical errors in arrayFold [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible exception from s3queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). +* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inconsistent formatting of queries [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Allow casting of bools in string representation to to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix system.s3queue_log [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix actions execution during preliminary filtering (PK, partition pruning) [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)). +* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert "Replace `ORDER BY ALL` by `ORDER BY *`" [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). +* s3queue: fix bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). +* Merging [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly check keys in s3Cluster [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* ... [#60457](https://github.com/ClickHouse/ClickHouse/pull/60457) ([Max K.](https://github.com/maxknv)). +* ... [#60512](https://github.com/ClickHouse/ClickHouse/pull/60512) ([Max K.](https://github.com/maxknv)). +* Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). +* ... [#60557](https://github.com/ClickHouse/ClickHouse/pull/60557) ([Max K.](https://github.com/maxknv)). +* BUG: build job can report success cache record on failed build Add a check relying on job report fail. [#60587](https://github.com/ClickHouse/ClickHouse/pull/60587) ([Max K.](https://github.com/maxknv)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Add new aggregation function groupArraySorted()""'. [#59003](https://github.com/ClickHouse/ClickHouse/pull/59003) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Update libxml2 version to address some bogus security issues"'. [#59479](https://github.com/ClickHouse/ClickHouse/pull/59479) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Poco Logger small refactoring"'. [#59509](https://github.com/ClickHouse/ClickHouse/pull/59509) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Poco Logger small refactoring""'. [#59564](https://github.com/ClickHouse/ClickHouse/pull/59564) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "MergeTree FINAL optimization diagnostics and settings"'. [#59702](https://github.com/ClickHouse/ClickHouse/pull/59702) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Use `MergeTree` as a default table engine"'. [#59711](https://github.com/ClickHouse/ClickHouse/pull/59711) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Rename a setting"'. [#59754](https://github.com/ClickHouse/ClickHouse/pull/59754) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Less error prone interface of read buffers"'. [#59911](https://github.com/ClickHouse/ClickHouse/pull/59911) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Update version_date.tsv and changelogs after v24.1.4.19-stable"'. [#59973](https://github.com/ClickHouse/ClickHouse/pull/59973) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "ReplicatedMergeTree invalid metadata_version fix"'. [#60058](https://github.com/ClickHouse/ClickHouse/pull/60058) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "ReplicatedMergeTree invalid metadata_version fix""'. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Implement system.dns_cache table"'. [#60085](https://github.com/ClickHouse/ClickHouse/pull/60085) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Restriction for the access key id for s3."'. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Do not retry queries if container is down in integration tests"'. [#60215](https://github.com/ClickHouse/ClickHouse/pull/60215) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Check stack size in Parser"'. [#60216](https://github.com/ClickHouse/ClickHouse/pull/60216) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Support resource request canceling"'. [#60253](https://github.com/ClickHouse/ClickHouse/pull/60253) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add definers for views"'. [#60350](https://github.com/ClickHouse/ClickHouse/pull/60350) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Update build-osx.md'. [#60380](https://github.com/ClickHouse/ClickHouse/pull/60380) ([rogeryk](https://github.com/rogeryk)). +* NO CL ENTRY: 'Revert "Fix: IAST::clone() for RENAME"'. [#60398](https://github.com/ClickHouse/ClickHouse/pull/60398) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Add table function `mergeTreeIndex`"'. [#60428](https://github.com/ClickHouse/ClickHouse/pull/60428) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Userspace page cache"'. [#60550](https://github.com/ClickHouse/ClickHouse/pull/60550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Analyzer: compute ALIAS columns right after reading"'. [#60570](https://github.com/ClickHouse/ClickHouse/pull/60570) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Analyzer: support aliases and distributed JOINs in StorageMerge [#50894](https://github.com/ClickHouse/ClickHouse/pull/50894) ([Dmitry Novik](https://github.com/novikd)). +* Userspace page cache [#53770](https://github.com/ClickHouse/ClickHouse/pull/53770) ([Michael Kolupaev](https://github.com/al13n321)). +* Simplify optimize-push-to-prewhere from query plan [#58554](https://github.com/ClickHouse/ClickHouse/pull/58554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Create ch/chc/chl symlinks by cmake as well (for develop mode) [#58609](https://github.com/ClickHouse/ClickHouse/pull/58609) ([Azat Khuzhin](https://github.com/azat)). +* CI: ci cache. step 1 [#58664](https://github.com/ClickHouse/ClickHouse/pull/58664) ([Max K.](https://github.com/maxknv)). +* Enable building JIT with UBSAN [#58952](https://github.com/ClickHouse/ClickHouse/pull/58952) ([Raúl Marín](https://github.com/Algunenano)). +* Support resource request canceling [#59032](https://github.com/ClickHouse/ClickHouse/pull/59032) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Do not resolve remote table id on initiator [#59073](https://github.com/ClickHouse/ClickHouse/pull/59073) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Add cast for ConstantNode from constant folding [#59121](https://github.com/ClickHouse/ClickHouse/pull/59121) ([Dmitry Novik](https://github.com/novikd)). +* Fix the default value of `async_insert_max_data_size` in EN document [#59161](https://github.com/ClickHouse/ClickHouse/pull/59161) ([Alex Cheng](https://github.com/Alex-Cheng)). +* CI: Add ARM integration tests [#59241](https://github.com/ClickHouse/ClickHouse/pull/59241) ([Max K.](https://github.com/maxknv)). +* Fix getting filename from read buffer wrappers [#59298](https://github.com/ClickHouse/ClickHouse/pull/59298) ([Kruglov Pavel](https://github.com/Avogar)). +* Update AWS SDK to 1.11.234 [#59299](https://github.com/ClickHouse/ClickHouse/pull/59299) ([Nikita Taranov](https://github.com/nickitat)). +* Split `ISlotControl` from `ConcurrencyControl` [#59313](https://github.com/ClickHouse/ClickHouse/pull/59313) ([Sergei Trifonov](https://github.com/serxa)). +* Some small fixes for docker images [#59337](https://github.com/ClickHouse/ClickHouse/pull/59337) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: bugfix-validate, integration, functional test scripts updates [#59348](https://github.com/ClickHouse/ClickHouse/pull/59348) ([Max K.](https://github.com/maxknv)). +* MaterializedMySQL: Fix gtid_after_attach_test to retry on detach [#59370](https://github.com/ClickHouse/ClickHouse/pull/59370) ([Val Doroshchuk](https://github.com/valbok)). +* Poco Logger small refactoring [#59375](https://github.com/ClickHouse/ClickHouse/pull/59375) ([Maksim Kita](https://github.com/kitaisreal)). +* Add sanity checks for function return types [#59379](https://github.com/ClickHouse/ClickHouse/pull/59379) ([Raúl Marín](https://github.com/Algunenano)). +* Cleanup connection pool surroundings [#59380](https://github.com/ClickHouse/ClickHouse/pull/59380) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix ARRAY JOIN with subcolumns [#59382](https://github.com/ClickHouse/ClickHouse/pull/59382) ([vdimir](https://github.com/vdimir)). +* Update curl submodule to be version 8.50 to address the irrelevant CVE-2023-46218 and CVE-2023-49219, which we don't care about at all. [#59384](https://github.com/ClickHouse/ClickHouse/pull/59384) ([josh-hildred](https://github.com/josh-hildred)). +* Update libxml2 version to address some bogus security issues [#59386](https://github.com/ClickHouse/ClickHouse/pull/59386) ([josh-hildred](https://github.com/josh-hildred)). +* Update version after release [#59393](https://github.com/ClickHouse/ClickHouse/pull/59393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Job names [#59395](https://github.com/ClickHouse/ClickHouse/pull/59395) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: fix status and report for docker server jobs [#59396](https://github.com/ClickHouse/ClickHouse/pull/59396) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.1.2048-stable [#59397](https://github.com/ClickHouse/ClickHouse/pull/59397) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Forward declaration for PeekableReadBuffer [#59399](https://github.com/ClickHouse/ClickHouse/pull/59399) ([Azat Khuzhin](https://github.com/azat)). +* Progress bar: use FQDN to differentiate metrics from different hosts [#59404](https://github.com/ClickHouse/ClickHouse/pull/59404) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix test test_stop_other_host_during_backup [#59432](https://github.com/ClickHouse/ClickHouse/pull/59432) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update run.sh [#59433](https://github.com/ClickHouse/ClickHouse/pull/59433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Post a failure status if can not run the CI [#59440](https://github.com/ClickHouse/ClickHouse/pull/59440) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Safer Rust (catch panic with catch_unwind()) [#59447](https://github.com/ClickHouse/ClickHouse/pull/59447) ([Azat Khuzhin](https://github.com/azat)). +* More parallel insert-select pipeline [#59448](https://github.com/ClickHouse/ClickHouse/pull/59448) ([Nikita Taranov](https://github.com/nickitat)). +* CLion says these headers are unused [#59451](https://github.com/ClickHouse/ClickHouse/pull/59451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)). +* Fix problem detected by UBSAN [#59461](https://github.com/ClickHouse/ClickHouse/pull/59461) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: Fix denny_crane [#59483](https://github.com/ClickHouse/ClickHouse/pull/59483) ([vdimir](https://github.com/vdimir)). +* Fix `00191_aggregating_merge_tree_and_final` [#59494](https://github.com/ClickHouse/ClickHouse/pull/59494) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid running all checks when `aspell-dict.txt` was changed [#59496](https://github.com/ClickHouse/ClickHouse/pull/59496) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Fixes for binary.html [#59499](https://github.com/ClickHouse/ClickHouse/pull/59499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parallel replicas: better initial replicas failover (2) [#59501](https://github.com/ClickHouse/ClickHouse/pull/59501) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v24.1.2.5-stable [#59510](https://github.com/ClickHouse/ClickHouse/pull/59510) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.3.40-stable [#59511](https://github.com/ClickHouse/ClickHouse/pull/59511) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.11.5.29-stable [#59515](https://github.com/ClickHouse/ClickHouse/pull/59515) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update memory tracker periodically with cgroup memory usage [#59516](https://github.com/ClickHouse/ClickHouse/pull/59516) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove a scary message if an error is retryable [#59517](https://github.com/ClickHouse/ClickHouse/pull/59517) ([alesapin](https://github.com/alesapin)). +* Update the peter-evans/create-pull-request action to v6 [#59520](https://github.com/ClickHouse/ClickHouse/pull/59520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix usage of StatusType [#59527](https://github.com/ClickHouse/ClickHouse/pull/59527) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer fix test_select_access_rights/test_main.py::test_select_count [#59528](https://github.com/ClickHouse/ClickHouse/pull/59528) ([vdimir](https://github.com/vdimir)). +* GRPCServer: do not call value() on empty optional query_info [#59533](https://github.com/ClickHouse/ClickHouse/pull/59533) ([Sema Checherinda](https://github.com/CheSema)). +* Use ConnectionPoolPtr instead of raw pointer [#59534](https://github.com/ClickHouse/ClickHouse/pull/59534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix data race with `AggregatedDataVariants` [#59537](https://github.com/ClickHouse/ClickHouse/pull/59537) ([Nikita Taranov](https://github.com/nickitat)). +* Refactoring of dashboard state encoding [#59554](https://github.com/ClickHouse/ClickHouse/pull/59554) ([Sergei Trifonov](https://github.com/serxa)). +* CI: ci_cache, enable await [#59555](https://github.com/ClickHouse/ClickHouse/pull/59555) ([Max K.](https://github.com/maxknv)). +* Bump libssh to 0.9.8 [#59563](https://github.com/ClickHouse/ClickHouse/pull/59563) ([Robert Schulze](https://github.com/rschu1ze)). +* MultiVersion use mutex [#59565](https://github.com/ClickHouse/ClickHouse/pull/59565) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix aws submodule reference [#59566](https://github.com/ClickHouse/ClickHouse/pull/59566) ([Raúl Marín](https://github.com/Algunenano)). +* Add missed #include and [#59567](https://github.com/ClickHouse/ClickHouse/pull/59567) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* CI: nightly job to update latest docker tag only [#59586](https://github.com/ClickHouse/ClickHouse/pull/59586) ([Max K.](https://github.com/maxknv)). +* Analyzer: compute ALIAS columns right after reading [#59595](https://github.com/ClickHouse/ClickHouse/pull/59595) ([vdimir](https://github.com/vdimir)). +* Add another sanity check for function return types [#59605](https://github.com/ClickHouse/ClickHouse/pull/59605) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md [#59610](https://github.com/ClickHouse/ClickHouse/pull/59610) ([Tyler Hannan](https://github.com/tylerhannan)). +* Updated a list of trusted contributors [#59616](https://github.com/ClickHouse/ClickHouse/pull/59616) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: fix ast fuzzer job report (slack bot issue) [#59629](https://github.com/ClickHouse/ClickHouse/pull/59629) ([Max K.](https://github.com/maxknv)). +* MergeTree FINAL optimization diagnostics and settings [#59650](https://github.com/ClickHouse/ClickHouse/pull/59650) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix default path when path is not specified in config [#59654](https://github.com/ClickHouse/ClickHouse/pull/59654) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up for [#59277](https://github.com/ClickHouse/ClickHouse/issues/59277) [#59659](https://github.com/ClickHouse/ClickHouse/pull/59659) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)). +* Unquote FLAG_LATEST to fix issue with empty argument [#59672](https://github.com/ClickHouse/ClickHouse/pull/59672) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)). +* ConnectionEstablisher: remove unused is_finished [#59706](https://github.com/ClickHouse/ClickHouse/pull/59706) ([Igor Nikonov](https://github.com/devcrafter)). +* Add test for increase-always autoscaling lambda [#59709](https://github.com/ClickHouse/ClickHouse/pull/59709) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove SourceWithKeyCondition from ReadFromStorageStep [#59720](https://github.com/ClickHouse/ClickHouse/pull/59720) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add assertions around FixedString code [#59737](https://github.com/ClickHouse/ClickHouse/pull/59737) ([Raúl Marín](https://github.com/Algunenano)). +* Fix skipping unused shards with analyzer [#59741](https://github.com/ClickHouse/ClickHouse/pull/59741) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix DB type check - now it'll refuse to create in Replicated databases [#59743](https://github.com/ClickHouse/ClickHouse/pull/59743) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: Fix test_replicating_constants/test.py::test_different_versions [#59750](https://github.com/ClickHouse/ClickHouse/pull/59750) ([Dmitry Novik](https://github.com/novikd)). +* Fix dashboard params default values [#59753](https://github.com/ClickHouse/ClickHouse/pull/59753) ([Sergei Trifonov](https://github.com/serxa)). +* Fix logical optimizer with LowCardinality in new analyzer [#59766](https://github.com/ClickHouse/ClickHouse/pull/59766) ([Antonio Andelic](https://github.com/antonio2368)). +* Update libuv [#59773](https://github.com/ClickHouse/ClickHouse/pull/59773) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Followup [#50894](https://github.com/ClickHouse/ClickHouse/issues/50894) [#59774](https://github.com/ClickHouse/ClickHouse/pull/59774) ([Dmitry Novik](https://github.com/novikd)). +* CI: ci test await [#59778](https://github.com/ClickHouse/ClickHouse/pull/59778) ([Max K.](https://github.com/maxknv)). +* Better logging for adaptive async timeouts [#59781](https://github.com/ClickHouse/ClickHouse/pull/59781) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix broken youtube embedding in ne-tormozit.md [#59782](https://github.com/ClickHouse/ClickHouse/pull/59782) ([Shaun Struwig](https://github.com/Blargian)). +* Hide URL/S3 'headers' argument in SHOW CREATE [#59787](https://github.com/ClickHouse/ClickHouse/pull/59787) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.3.31-stable [#59799](https://github.com/ClickHouse/ClickHouse/pull/59799) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.4.15-stable [#59800](https://github.com/ClickHouse/ClickHouse/pull/59800) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: fix test_access_for_functions/test.py::test_access_rights_for_function [#59801](https://github.com/ClickHouse/ClickHouse/pull/59801) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_wrong_db_or_table_name/test.py::test_wrong_table_name [#59806](https://github.com/ClickHouse/ClickHouse/pull/59806) ([Dmitry Novik](https://github.com/novikd)). +* CI: await tune ups [#59807](https://github.com/ClickHouse/ClickHouse/pull/59807) ([Max K.](https://github.com/maxknv)). +* Enforce tests with enabled analyzer in CI [#59814](https://github.com/ClickHouse/ClickHouse/pull/59814) ([Dmitry Novik](https://github.com/novikd)). +* Handle different timestamp related aspects of zip-files [#59815](https://github.com/ClickHouse/ClickHouse/pull/59815) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix settings history azure_max_single_part_copy_size [#59819](https://github.com/ClickHouse/ClickHouse/pull/59819) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated a list of trusted contributors [#59844](https://github.com/ClickHouse/ClickHouse/pull/59844) ([Maksim Kita](https://github.com/kitaisreal)). +* Add check for recursiveRemoveLowCardinality() [#59845](https://github.com/ClickHouse/ClickHouse/pull/59845) ([Vitaly Baranov](https://github.com/vitlibar)). +* Better warning for disabled kernel.task_delayacct [#59846](https://github.com/ClickHouse/ClickHouse/pull/59846) ([Azat Khuzhin](https://github.com/azat)). +* Reintroduce 02590_interserver_mode_client_info_initial_query_start_time [#59851](https://github.com/ClickHouse/ClickHouse/pull/59851) ([Azat Khuzhin](https://github.com/azat)). +* Respect CMAKE_OSX_DEPLOYMENT_TARGET for Rust targets [#59852](https://github.com/ClickHouse/ClickHouse/pull/59852) ([Azat Khuzhin](https://github.com/azat)). +* Do not reinitialize ZooKeeperWithFaultInjection on each chunk [#59854](https://github.com/ClickHouse/ClickHouse/pull/59854) ([Alexander Gololobov](https://github.com/davenger)). +* Fix: check if std::function is set before calling it [#59858](https://github.com/ClickHouse/ClickHouse/pull/59858) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix long shutdown of FileLog storage [#59873](https://github.com/ClickHouse/ClickHouse/pull/59873) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 02322_sql_insert_format flakiness [#59874](https://github.com/ClickHouse/ClickHouse/pull/59874) ([Azat Khuzhin](https://github.com/azat)). +* Follow up for [#58554](https://github.com/ClickHouse/ClickHouse/issues/58554). Cleanup. [#59889](https://github.com/ClickHouse/ClickHouse/pull/59889) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: Fix job failures due to jepsen artifacts [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)). +* Add test 02988_join_using_prewhere_pushdown [#59892](https://github.com/ClickHouse/ClickHouse/pull/59892) ([vdimir](https://github.com/vdimir)). +* Do not pull mutations if pulling replication log had been stopped [#59895](https://github.com/ClickHouse/ClickHouse/pull/59895) ([Azat Khuzhin](https://github.com/azat)). +* Fix `02982_comments_in_system_tables` [#59896](https://github.com/ClickHouse/ClickHouse/pull/59896) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Refactor Squashing for inserts. [#59899](https://github.com/ClickHouse/ClickHouse/pull/59899) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not rebuild a lambda package if it is updated [#59902](https://github.com/ClickHouse/ClickHouse/pull/59902) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix self-extracting: macOS doesn't allow to run renamed executable - copy instead [#59906](https://github.com/ClickHouse/ClickHouse/pull/59906) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update tests with indexHint for analyzer. [#59907](https://github.com/ClickHouse/ClickHouse/pull/59907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Petite cleanup around macros and ReplicatedMergeTree [#59909](https://github.com/ClickHouse/ClickHouse/pull/59909) ([Azat Khuzhin](https://github.com/azat)). +* Fix: absence of closing record in query_log for failed insert over http [#59910](https://github.com/ClickHouse/ClickHouse/pull/59910) ([Igor Nikonov](https://github.com/devcrafter)). +* Decrease logging level for http retriable errors to Warning (and fix 00157_cache_dictionary flakiness) [#59920](https://github.com/ClickHouse/ClickHouse/pull/59920) ([Azat Khuzhin](https://github.com/azat)). +* Remove `test_distributed_backward_compatability` [#59921](https://github.com/ClickHouse/ClickHouse/pull/59921) ([Dmitry Novik](https://github.com/novikd)). +* Commands node args should add rvalue to push_back to reduce object copy cost [#59922](https://github.com/ClickHouse/ClickHouse/pull/59922) ([xuzifu666](https://github.com/xuzifu666)). +* tests: fix 02981_vertical_merges_memory_usage flakiness [#59923](https://github.com/ClickHouse/ClickHouse/pull/59923) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Update broken integration tests list [#59924](https://github.com/ClickHouse/ClickHouse/pull/59924) ([Dmitry Novik](https://github.com/novikd)). +* CI: integration tests to mysql80 [#59939](https://github.com/ClickHouse/ClickHouse/pull/59939) ([Max K.](https://github.com/maxknv)). +* Register StorageMergeTree exception message fix [#59941](https://github.com/ClickHouse/ClickHouse/pull/59941) ([Maksim Kita](https://github.com/kitaisreal)). +* Replace lambdas with pointers to members to simplify stacks [#59944](https://github.com/ClickHouse/ClickHouse/pull/59944) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer: Fix test_user_defined_object_persistence [#59948](https://github.com/ClickHouse/ClickHouse/pull/59948) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_mutations_with_merge_tree [#59951](https://github.com/ClickHouse/ClickHouse/pull/59951) ([Dmitry Novik](https://github.com/novikd)). +* Cleanups [#59964](https://github.com/ClickHouse/ClickHouse/pull/59964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.1.4.19-stable [#59966](https://github.com/ClickHouse/ClickHouse/pull/59966) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Less conflicts [#59968](https://github.com/ClickHouse/ClickHouse/pull/59968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We don't have external dictionaries from Aerospike [#59969](https://github.com/ClickHouse/ClickHouse/pull/59969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix max num to warn message [#59972](https://github.com/ClickHouse/ClickHouse/pull/59972) ([Jordi Villar](https://github.com/jrdi)). +* Analyzer: Fix test_settings_profile [#59975](https://github.com/ClickHouse/ClickHouse/pull/59975) ([Dmitry Novik](https://github.com/novikd)). +* Update version_date.tsv and changelogs after v24.1.4.20-stable [#59978](https://github.com/ClickHouse/ClickHouse/pull/59978) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: Fix test_storage_rabbitmq [#59981](https://github.com/ClickHouse/ClickHouse/pull/59981) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_shard_level_const_function [#59983](https://github.com/ClickHouse/ClickHouse/pull/59983) ([Dmitry Novik](https://github.com/novikd)). +* Add newlines to SettingsChangesHistory to maybe have less conflicts [#59984](https://github.com/ClickHouse/ClickHouse/pull/59984) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove context from comparison functions. [#59985](https://github.com/ClickHouse/ClickHouse/pull/59985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v24.1.5.6-stable [#59993](https://github.com/ClickHouse/ClickHouse/pull/59993) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Insert synchronously if dependent MV deduplication is enabled" [#59998](https://github.com/ClickHouse/ClickHouse/pull/59998) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix obviously wrong (but non significant) error in dictionaries [#60005](https://github.com/ClickHouse/ClickHouse/pull/60005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit randomization in some tests [#60009](https://github.com/ClickHouse/ClickHouse/pull/60009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The code should not be complex [#60010](https://github.com/ClickHouse/ClickHouse/pull/60010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exclude test run from a slow build [#60011](https://github.com/ClickHouse/ClickHouse/pull/60011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix broken lambdas formatting [#60012](https://github.com/ClickHouse/ClickHouse/pull/60012) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Verify formatting consistency on the server-side [#60013](https://github.com/ClickHouse/ClickHouse/pull/60013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix test_sql_user_defined_functions_on_cluster [#60019](https://github.com/ClickHouse/ClickHouse/pull/60019) ([Dmitry Novik](https://github.com/novikd)). +* Fix 02981_vertical_merges_memory_usage with SharedMergeTree [#60028](https://github.com/ClickHouse/ClickHouse/pull/60028) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 01656_test_query_log_factories_info with analyzer. [#60037](https://github.com/ClickHouse/ClickHouse/pull/60037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable tests with coverage [#60047](https://github.com/ClickHouse/ClickHouse/pull/60047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print CPU flags at startup [#60075](https://github.com/ClickHouse/ClickHouse/pull/60075) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup: less confusion between config priority and balancing priority in connection pools [#60077](https://github.com/ClickHouse/ClickHouse/pull/60077) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporary table already exists exception message fix [#60080](https://github.com/ClickHouse/ClickHouse/pull/60080) ([Maksim Kita](https://github.com/kitaisreal)). +* Refactor prewhere and primary key optimization [#60082](https://github.com/ClickHouse/ClickHouse/pull/60082) ([Amos Bird](https://github.com/amosbird)). +* Bump curl to version 4.6.0 [#60084](https://github.com/ClickHouse/ClickHouse/pull/60084) ([josh-hildred](https://github.com/josh-hildred)). +* Check wrong abbreviations [#60086](https://github.com/ClickHouse/ClickHouse/pull/60086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the check for formatting consistency from the Fuzzer [#60088](https://github.com/ClickHouse/ClickHouse/pull/60088) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid overflow in settings [#60089](https://github.com/ClickHouse/ClickHouse/pull/60089) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A small preparation for better handling of primary key in memory [#60092](https://github.com/ClickHouse/ClickHouse/pull/60092) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move threadPoolCallbackRunner to the "Common" folder [#60097](https://github.com/ClickHouse/ClickHouse/pull/60097) ([Vitaly Baranov](https://github.com/vitlibar)). +* Speed up the CI [#60106](https://github.com/ClickHouse/ClickHouse/pull/60106) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Insignificant changes [#60108](https://github.com/ClickHouse/ClickHouse/pull/60108) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not retry queries if container is down in integration tests [#60109](https://github.com/ClickHouse/ClickHouse/pull/60109) ([Azat Khuzhin](https://github.com/azat)). +* Better check for inconsistent formatting [#60110](https://github.com/ClickHouse/ClickHouse/pull/60110) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* skip printing meaningless log [#60123](https://github.com/ClickHouse/ClickHouse/pull/60123) ([conic](https://github.com/conicl)). +* Implement TODO [#60124](https://github.com/ClickHouse/ClickHouse/pull/60124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message [#60125](https://github.com/ClickHouse/ClickHouse/pull/60125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `IMergeTreeDataPart` [#60139](https://github.com/ClickHouse/ClickHouse/pull/60139) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new setting to changes history [#60141](https://github.com/ClickHouse/ClickHouse/pull/60141) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer: fix row level filters with PREWHERE + additional filters [#60142](https://github.com/ClickHouse/ClickHouse/pull/60142) ([vdimir](https://github.com/vdimir)). +* Tests: query log for inserts over http [#60143](https://github.com/ClickHouse/ClickHouse/pull/60143) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix build in master [#60151](https://github.com/ClickHouse/ClickHouse/pull/60151) ([Raúl Marín](https://github.com/Algunenano)). +* Add setting history check to stateless tests [#60154](https://github.com/ClickHouse/ClickHouse/pull/60154) ([Raúl Marín](https://github.com/Algunenano)). +* Mini cleanup of CPUID.h [#60155](https://github.com/ClickHouse/ClickHouse/pull/60155) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix: custom key failover test flakiness [#60158](https://github.com/ClickHouse/ClickHouse/pull/60158) ([Igor Nikonov](https://github.com/devcrafter)). +* Skip sanity checks on secondary CREATE query [#60159](https://github.com/ClickHouse/ClickHouse/pull/60159) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove extensively aggressive check [#60162](https://github.com/ClickHouse/ClickHouse/pull/60162) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong message during compilation [#60178](https://github.com/ClickHouse/ClickHouse/pull/60178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#44318](https://github.com/ClickHouse/ClickHouse/issues/44318) [#60179](https://github.com/ClickHouse/ClickHouse/pull/60179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test for 59437 [#60191](https://github.com/ClickHouse/ClickHouse/pull/60191) ([Raúl Marín](https://github.com/Algunenano)). +* CI: hot fix for gh statuses [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)). +* Limit libarchive format to what we use [#60203](https://github.com/ClickHouse/ClickHouse/pull/60203) ([San](https://github.com/santrancisco)). +* Fix bucket region discovery [#60204](https://github.com/ClickHouse/ClickHouse/pull/60204) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `test_backup_restore_s3/test.py::test_user_specific_auth` [#60210](https://github.com/ClickHouse/ClickHouse/pull/60210) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: combine analyzer, s3, dbreplicated into one job [#60224](https://github.com/ClickHouse/ClickHouse/pull/60224) ([Max K.](https://github.com/maxknv)). +* Slightly better Keeper loading from snapshot [#60226](https://github.com/ClickHouse/ClickHouse/pull/60226) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: IAST::clone() for RENAME [#60227](https://github.com/ClickHouse/ClickHouse/pull/60227) ([Igor Nikonov](https://github.com/devcrafter)). +* Treat 2+ in allow_experimental_parallel_reading_from_replicas as 2 [#60228](https://github.com/ClickHouse/ClickHouse/pull/60228) ([Raúl Marín](https://github.com/Algunenano)). +* CI: random job pick support [#60229](https://github.com/ClickHouse/ClickHouse/pull/60229) ([Max K.](https://github.com/maxknv)). +* Fix analyzer - hide arguments for secret functions [#60230](https://github.com/ClickHouse/ClickHouse/pull/60230) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backups delete suspicious file [#60231](https://github.com/ClickHouse/ClickHouse/pull/60231) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: random sanitizer for parallel repl in PR wf [#60234](https://github.com/ClickHouse/ClickHouse/pull/60234) ([Max K.](https://github.com/maxknv)). +* CI: use aarch runner for runconfig job [#60236](https://github.com/ClickHouse/ClickHouse/pull/60236) ([Max K.](https://github.com/maxknv)). +* Add test for 60232 [#60244](https://github.com/ClickHouse/ClickHouse/pull/60244) ([Raúl Marín](https://github.com/Algunenano)). +* Make cloud sync required [#60245](https://github.com/ClickHouse/ClickHouse/pull/60245) ([Raúl Marín](https://github.com/Algunenano)). +* Tests from [#60094](https://github.com/ClickHouse/ClickHouse/issues/60094) [#60256](https://github.com/ClickHouse/ClickHouse/pull/60256) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad check in Keeper [#60266](https://github.com/ClickHouse/ClickHouse/pull/60266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix integration `test_backup_restore_s3` [#60269](https://github.com/ClickHouse/ClickHouse/pull/60269) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore valid 'No such key errors' in stress tests [#60270](https://github.com/ClickHouse/ClickHouse/pull/60270) ([Raúl Marín](https://github.com/Algunenano)). +* Stress test: Include the first sanitizer block message in the report [#60283](https://github.com/ClickHouse/ClickHouse/pull/60283) ([Raúl Marín](https://github.com/Algunenano)). +* Update analyzer_tech_debt.txt [#60303](https://github.com/ClickHouse/ClickHouse/pull/60303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor fixes for hashed dictionary [#60310](https://github.com/ClickHouse/ClickHouse/pull/60310) ([vdimir](https://github.com/vdimir)). +* Install tailscale during AMI build and set it up on runners [#60316](https://github.com/ClickHouse/ClickHouse/pull/60316) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: remove Integration tests asan and release from PR wf [#60327](https://github.com/ClickHouse/ClickHouse/pull/60327) ([Max K.](https://github.com/maxknv)). +* Fix - analyzer related - "executable" function subquery arguments. [#60339](https://github.com/ClickHouse/ClickHouse/pull/60339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update settings.md to correct the description for setting `max_concurrent_queries_for_user` [#60343](https://github.com/ClickHouse/ClickHouse/pull/60343) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Fix rapidjson submodule [#60346](https://github.com/ClickHouse/ClickHouse/pull/60346) ([Raúl Marín](https://github.com/Algunenano)). +* Validate experimental and suspicious types inside nested types under a setting [#60353](https://github.com/ClickHouse/ClickHouse/pull/60353) ([Kruglov Pavel](https://github.com/Avogar)). +* Update 01158_zookeeper_log_long.sql [#60357](https://github.com/ClickHouse/ClickHouse/pull/60357) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add missed #include [#60358](https://github.com/ClickHouse/ClickHouse/pull/60358) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* Follow up [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082) [#60360](https://github.com/ClickHouse/ClickHouse/pull/60360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove ALTER LIVE VIEW [#60370](https://github.com/ClickHouse/ClickHouse/pull/60370) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Expose fatal.log separately for fuzzer [#60374](https://github.com/ClickHouse/ClickHouse/pull/60374) ([Azat Khuzhin](https://github.com/azat)). +* Minor changes for dashboard [#60387](https://github.com/ClickHouse/ClickHouse/pull/60387) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unused method [#60388](https://github.com/ClickHouse/ClickHouse/pull/60388) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to map UI handlers to different paths [#60389](https://github.com/ClickHouse/ClickHouse/pull/60389) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove old tags from integration tests [#60407](https://github.com/ClickHouse/ClickHouse/pull/60407) ([Raúl Marín](https://github.com/Algunenano)). +* Update `liburing` to 2.5 [#60409](https://github.com/ClickHouse/ClickHouse/pull/60409) ([Nikita Taranov](https://github.com/nickitat)). +* Fix undefined-behavior in case of too big max_execution_time setting [#60419](https://github.com/ClickHouse/ClickHouse/pull/60419) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong log message in Fuzzer [#60425](https://github.com/ClickHouse/ClickHouse/pull/60425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unrestricted reads from keeper [#60429](https://github.com/ClickHouse/ClickHouse/pull/60429) ([Raúl Marín](https://github.com/Algunenano)). +* Split update_mergeable_check into two functions to force trigger the status [#60431](https://github.com/ClickHouse/ClickHouse/pull/60431) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Revert "Add table function `mergeTreeIndex`"" [#60435](https://github.com/ClickHouse/ClickHouse/pull/60435) ([Anton Popov](https://github.com/CurtizJ)). +* Revert "Merge pull request [#56864](https://github.com/ClickHouse/ClickHouse/issues/56864) from ClickHouse/broken-projections-better-handling" [#60436](https://github.com/ClickHouse/ClickHouse/pull/60436) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper: fix moving changelog files between disks [#60442](https://github.com/ClickHouse/ClickHouse/pull/60442) ([Antonio Andelic](https://github.com/antonio2368)). +* Replace deprecated distutils by vendored packaging [#60444](https://github.com/ClickHouse/ClickHouse/pull/60444) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not fail the build if ci-logs is not healthy [#60445](https://github.com/ClickHouse/ClickHouse/pull/60445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move setting `enable_order_by_all` out of the experimental setting section [#60449](https://github.com/ClickHouse/ClickHouse/pull/60449) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace `boost::algorithm::starts_with()` by `std::string::starts_with()` [#60450](https://github.com/ClickHouse/ClickHouse/pull/60450) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace boost::algorithm::ends_with() by std::string::ends_with() [#60454](https://github.com/ClickHouse/ClickHouse/pull/60454) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: remove input params for job scripts [#60455](https://github.com/ClickHouse/ClickHouse/pull/60455) ([Max K.](https://github.com/maxknv)). +* Fix: 02496_remove_redundant_sorting_analyzer [#60456](https://github.com/ClickHouse/ClickHouse/pull/60456) ([Igor Nikonov](https://github.com/devcrafter)). +* PR template fix to include ci fix category [#60461](https://github.com/ClickHouse/ClickHouse/pull/60461) ([Max K.](https://github.com/maxknv)). +* Reduce iterations in 01383_log_broken_table [#60465](https://github.com/ClickHouse/ClickHouse/pull/60465) ([Raúl Marín](https://github.com/Algunenano)). +* Merge [#57434](https://github.com/ClickHouse/ClickHouse/issues/57434) [#60466](https://github.com/ClickHouse/ClickHouse/pull/60466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test: looks like an obvious race condition, but I didn't check in detail. [#60471](https://github.com/ClickHouse/ClickHouse/pull/60471) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make test slower [#60472](https://github.com/ClickHouse/ClickHouse/pull/60472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cgroups v1 rss parsing in CgroupsMemoryUsageObserver [#60481](https://github.com/ClickHouse/ClickHouse/pull/60481) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: fix pr check status to not fail mergeable check [#60483](https://github.com/ClickHouse/ClickHouse/pull/60483) ([Max K.](https://github.com/maxknv)). +* Report respects skipped builds [#60488](https://github.com/ClickHouse/ClickHouse/pull/60488) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: quick style fix [#60490](https://github.com/ClickHouse/ClickHouse/pull/60490) ([Max K.](https://github.com/maxknv)). +* Decrease logging level for http retriable errors to Info [#60508](https://github.com/ClickHouse/ClickHouse/pull/60508) ([Raúl Marín](https://github.com/Algunenano)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23fc0032056..572ceddf590 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.2.1.2248-stable 2024-02-29 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 From da0cafc31c5dd08057969086ede0f9495daa7b31 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Thu, 29 Feb 2024 13:30:55 -0800 Subject: [PATCH 119/356] Fix arm64 floating point result inconsistency issue --- cmake/linux/toolchain-aarch64.cmake | 4 ++-- tests/queries/0_stateless/02813_seriesDecomposeSTL.sql | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index b80cc01296d..d2ce2d97d8e 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -13,6 +13,6 @@ set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-aarch set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-contract=off --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffp-contract=off --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") diff --git a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql index 929d0474e09..496267f2476 100644 --- a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql +++ b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql @@ -1,6 +1,3 @@ --- Tags: no-cpu-aarch64 --- Tag no-cpu-aarch64: values generated are slighly different on aarch64 - DROP TABLE IF EXISTS tb2; CREATE TABLE tb2 (`period` UInt32, `ts` Array(Float64)) ENGINE = Memory; From 17210bb4948d42f527acd31d013f92d78a2bd4c1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 1 Mar 2024 00:21:31 +0100 Subject: [PATCH 120/356] Fix type, add _server suffix to start and stop functions --- docker/test/stateless/stress_tests.lib | 13 +++++++------ docker/test/stress/run.sh | 24 ++++++++---------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index ae98f9a0a3a..c0fc32ab718 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -29,7 +29,7 @@ function unts() function trim_server_logs() { - head -n :$FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped + head -n "$FAILURE_CONTEXT_LINES" "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped } function install_packages() @@ -146,10 +146,10 @@ EOL } -function stop() +function stop_server() { - local max_tries="${1:-90}" - local check_hang="${2:-true}" + local max_tries=90 + local check_hang=true local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" @@ -176,12 +176,13 @@ function stop() fi } -function start() +function start_server() { counter=0 + max_attempt=120 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt "${1:-120}" ] + if [ "$counter" -gt "$max_attempt" ] then echo "Cannot start clickhouse-server" rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 1f0d55605af..621a6ced7f6 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -57,8 +57,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml -# shellcheck disable=SC2119 -start +start_server setup_logs_replication @@ -68,8 +67,7 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" -# shellcheck disable=SC2119 -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log # Randomize cache policies. @@ -89,8 +87,7 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi -# shellcheck disable=SC2119 -start +start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" @@ -193,8 +190,7 @@ clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "SYSTEM STOP THREAD FUZZER" -# shellcheck disable=SC2119 -stop +stop_server # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 @@ -228,8 +224,7 @@ if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then > /etc/clickhouse-server/config.d/enable_async_load_databases.xml fi -# shellcheck disable=SC2119 -start +start_server stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ @@ -239,21 +234,18 @@ stress --hung-check --drop-databases --output-folder test_output --skip-func-tes rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" >> /test_output/test_results.tsv -# shellcheck disable=SC2119 -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. unset "${!THREAD_@}" -# shellcheck disable=SC2119 -start +start_server check_server_start -# shellcheck disable=SC2119 -stop +stop_server [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" From 3b3a7c144bb93bd89eff401ed1596217c287837c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Thu, 29 Feb 2024 18:47:27 -0500 Subject: [PATCH 121/356] Update 02998_analyzer_secret_args_tree_node.sql --- .../0_stateless/02998_analyzer_secret_args_tree_node.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql index e5b7a9fe20a..f40b40b6c8c 100644 --- a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -1,4 +1,5 @@ --- Tag: no-fasttest: encrypt function doesn't exist in the fastest build +-- Tags: no-fasttest +-- encrypt function doesn't exist in the fastest build -- { echoOn } SET allow_experimental_analyzer = 1; From 1df6f4cd5cd221e17aa21f172aa1f9615fe6d6a1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 29 Feb 2024 21:26:39 -0800 Subject: [PATCH 122/356] fix(rust): Fix skim's panic handler --- rust/skim/src/lib.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/rust/skim/src/lib.rs b/rust/skim/src/lib.rs index a20b1b35033..58d5be51baa 100644 --- a/rust/skim/src/lib.rs +++ b/rust/skim/src/lib.rs @@ -1,7 +1,7 @@ -use skim::prelude::*; -use term::terminfo::TermInfo; use cxx::{CxxString, CxxVector}; +use skim::prelude::*; use std::panic; +use term::terminfo::TermInfo; #[cxx::bridge] mod ffi { @@ -16,7 +16,7 @@ struct Item { } impl Item { fn new(text: String) -> Self { - return Self{ + Self { // Text that will be printed by skim, and will be used for matching. // // Text that will be shown should not contains new lines since in this case skim may @@ -24,16 +24,16 @@ impl Item { text_no_newlines: text.replace("\n", " "), // This will be used when the match had been selected. orig_text: text, - }; + } } } impl SkimItem for Item { fn text(&self) -> Cow { - return Cow::Borrowed(&self.text_no_newlines); + Cow::Borrowed(&self.text_no_newlines) } fn output(&self) -> Cow { - return Cow::Borrowed(&self.orig_text); + Cow::Borrowed(&self.orig_text) } } @@ -88,14 +88,11 @@ fn skim_impl(prefix: &CxxString, words: &CxxVector) -> Result) -> Result { - let ret = panic::catch_unwind(|| { - return skim_impl(prefix, words); - }); - return match ret { + match panic::catch_unwind(|| skim_impl(prefix, words)) { Err(err) => { let e = if let Some(s) = err.downcast_ref::() { format!("{}", s) @@ -105,7 +102,7 @@ fn skim(prefix: &CxxString, words: &CxxVector) -> Result res, } } From 7d58d8236145478fb9f996e1990d61515c14d0b6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 29 Feb 2024 18:25:38 -0800 Subject: [PATCH 123/356] internal: Refine rust prql code This makes the rust code a bit more idiomatic. It builds on top of #60616, which can merge first. There aren't any rust tests here, so would like to check CI. --- rust/prql/src/lib.rs | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs index 621f6aae5a2..9e4b0ae194a 100644 --- a/rust/prql/src/lib.rs +++ b/rust/prql/src/lib.rs @@ -14,39 +14,36 @@ fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) { *out_ptr = CString::new(result).unwrap().into_raw() as *mut u8; } +/// Converts a PRQL query from a raw C string to SQL, returning an error code if the conversion fails. pub unsafe extern "C" fn prql_to_sql_impl( query: *const u8, size: u64, out: *mut *mut u8, out_size: *mut u64, ) -> i64 { - let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec(); - let maybe_prql_query = String::from_utf8(query_vec); - if maybe_prql_query.is_err() { + let query_vec = slice::from_raw_parts(query, size.try_into().unwrap()).to_vec(); + let Ok(query_str) = String::from_utf8(query_vec) else { set_output( - String::from("The PRQL query must be UTF-8 encoded!"), + "The PRQL query must be UTF-8 encoded!".to_string(), out, out_size, ); return 1; - } - let prql_query = maybe_prql_query.unwrap(); - let opts = &Options { + }; + + let opts = Options { format: true, target: Target::Sql(Some(Dialect::ClickHouse)), signature_comment: false, color: false, }; - let (is_err, res) = match prqlc::compile(&prql_query, &opts) { - Ok(sql_str) => (false, sql_str), - Err(err) => (true, err.to_string()), - }; - set_output(res, out, out_size); - - match is_err { - true => 1, - false => 0, + if let Ok(sql_str) = prqlc::compile(&query_str, &opts) { + set_output(sql_str, out, out_size); + 0 + } else { + set_output("PRQL compilation failed!".to_string(), out, out_size); + 1 } } From 845dcc95eca3e8664553c664f51432315f472d09 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 1 Mar 2024 11:43:54 +0000 Subject: [PATCH 124/356] Fix build --- utils/memcpy-bench/FastMemcpy.h | 2 +- utils/memcpy-bench/FastMemcpy_Avx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/memcpy-bench/FastMemcpy.h b/utils/memcpy-bench/FastMemcpy.h index e2ac73a1b63..650a6761771 100644 --- a/utils/memcpy-bench/FastMemcpy.h +++ b/utils/memcpy-bench/FastMemcpy.h @@ -654,7 +654,7 @@ __attribute__((__no_sanitize__("undefined"))) inline void *memcpy_tiny(void * __ //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -inline void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) +void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) /// NOLINT(misc-definitions-in-headers) { unsigned char *dst = (unsigned char*)destination; const unsigned char *src = (const unsigned char*)source; diff --git a/utils/memcpy-bench/FastMemcpy_Avx.h b/utils/memcpy-bench/FastMemcpy_Avx.h index 3271e10d237..aecf8abbe03 100644 --- a/utils/memcpy-bench/FastMemcpy_Avx.h +++ b/utils/memcpy-bench/FastMemcpy_Avx.h @@ -371,7 +371,7 @@ static INLINE void *memcpy_tiny_avx(void * __restrict dst, const void * __restri //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -inline void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) +void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) /// NOLINT(misc-definitions-in-headers) { unsigned char *dst = reinterpret_cast(destination); const unsigned char *src = reinterpret_cast(source); From 447f0817d5dfe70070d0e72a09188cd2306cb88e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 1 Mar 2024 12:08:51 +0000 Subject: [PATCH 125/356] Fixing build --- .../System/IStorageSystemOneBlock.cpp | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 src/Storages/System/IStorageSystemOneBlock.cpp diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp new file mode 100644 index 00000000000..20c8f3517ee --- /dev/null +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -0,0 +1,100 @@ +#include +// #include +// #include +// #include +// #include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ReadFromSystemOneBlock : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemOneBlock"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemOneBlock( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const ActionsDAG::Node * predicate = nullptr; +}; + +void IStorageSystemOneBlock::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) + +{ + storage_snapshot->check(column_names); + Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + std::vector columns_mask; + + if (supportsColumnsMask()) + { + auto [columns_mask_, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + columns_mask = std::move(columns_mask_); + sample_block = std::move(header); + } + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(sample_block), std::move(this_ptr), std::move(columns_mask)); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + const auto & sample_block = getOutputStream().header; + MutableColumns res_columns = sample_block.cloneEmptyColumns(); + storage->fillData(res_columns, context, predicate, std::move(columns_mask)); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + pipeline.init(Pipe(std::make_shared(sample_block, std::move(chunk)))); +} + +void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +} From dc2d022cc49fd9bcf892a768c4063930f24e5104 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 1 Mar 2024 12:29:57 +0000 Subject: [PATCH 126/356] Docs: Follow-up to #58023 --- docs/en/sql-reference/statements/alter/column.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index f6d9668e628..902eae2d67d 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -272,10 +272,16 @@ ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_bloc ## MATERIALIZE COLUMN -Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). -It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive. +Materializes a column with a `DEFAULT` or `MATERIALIZED` value expression. +This statement can be used to rewrite existing column data after a `DEFAULT` or `MATERIALIZED` expression has been added or updated (which only updates the metadata but does not change existing data). Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). +For columns with a new or updated `MATERIALIZED` value expression, all existing rows are rewritten. + +For columns with a new or updated `DEFAULT` value expression, the behavior changed in ClickHouse v24.2: +- In ClickHouse < v24.2, all existing rows are changed to the new `DEFAULT` value expression. +- In ClickHouse >= v24.2, only rows containing the previous default value are changed to the new `DEFAULT` value expression. Rows with non-default values are kept as is. + Syntax: ```sql From b45ff56e4df3ed300a5d6c7e5b51979231f11fb7 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 27 Feb 2024 18:27:06 +0100 Subject: [PATCH 127/356] Do not check to and from files existence in metadata_storage because it does not see uncommitted changes --- src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index de7a71e8dc1..66e8beb0637 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -595,12 +595,6 @@ void DiskObjectStorageTransaction::moveFile(const String & from_path, const Stri operations_to_execute.emplace_back( std::make_unique(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx) { - if (metadata_storage.exists(to_path)) - throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "File already exists: {}", to_path); - - if (!metadata_storage.exists(from_path)) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist, cannot move", from_path); - tx->moveFile(from_path, to_path); })); } From 590c30a23a0289b2e5333b54ebec204f00163209 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 28 Feb 2024 16:25:38 +0100 Subject: [PATCH 128/356] Update DiskObjectStorageTransaction.cpp --- src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 66e8beb0637..e1ac9f6fba7 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -23,7 +23,6 @@ namespace ErrorCodes extern const int CANNOT_OPEN_FILE; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; - extern const int FILE_ALREADY_EXISTS; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int LOGICAL_ERROR; } From 2dd83a09e3e150f041482953bb05a3e2c7ac6cec Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 28 Feb 2024 16:55:23 +0100 Subject: [PATCH 129/356] No need to capture this --- src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index e1ac9f6fba7..d25add625e8 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -592,7 +592,7 @@ void DiskObjectStorageTransaction::moveDirectory(const std::string & from_path, void DiskObjectStorageTransaction::moveFile(const String & from_path, const String & to_path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [from_path, to_path](MetadataTransactionPtr tx) { tx->moveFile(from_path, to_path); })); From 68181ab00f356a66705ed49cc862da95bea2db5c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 1 Mar 2024 13:14:44 +0000 Subject: [PATCH 130/356] Revert "Revert "Add `toMillisecond` function"" This reverts commit 5225fc6a14bd6c1de1b70be1efaa5e052e0a338c. --- .../functions/date-time-functions.md | 60 ++++++++++++---- src/Common/DateLUTImpl.h | 40 ++++++++--- src/Functions/DateTimeTransforms.cpp | 15 ++-- src/Functions/DateTimeTransforms.h | 71 +++++++++++++------ src/Functions/toMillisecond.cpp | 18 +++++ .../02998_to_milliseconds.reference | 8 +++ .../0_stateless/02998_to_milliseconds.sql | 17 +++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 8 files changed, 179 insertions(+), 51 deletions(-) create mode 100644 src/Functions/toMillisecond.cpp create mode 100644 tests/queries/0_stateless/02998_to_milliseconds.reference create mode 100644 tests/queries/0_stateless/02998_to_milliseconds.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c5b3b4cc3ae..83a3bd77cdb 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -394,8 +394,7 @@ Result: ## toYear -Converts a date or date with time to the year number (AD) as `UInt16` value. - +Returns the year component (AD) of a date or date with time. **Syntax** @@ -431,7 +430,7 @@ Result: ## toQuarter -Converts a date or date with time to the quarter number (1-4) as `UInt8` value. +Returns the quarter (1-4) of a date or date with time. **Syntax** @@ -465,10 +464,9 @@ Result: └──────────────────────────────────────────────┘ ``` - ## toMonth -Converts a date or date with time to the month number (1-12) as `UInt8` value. +Returns the month component (1-12) of a date or date with time. **Syntax** @@ -504,7 +502,7 @@ Result: ## toDayOfYear -Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value. +Returns the number of the day within the year (1-366) of a date or date with time. **Syntax** @@ -540,7 +538,7 @@ Result: ## toDayOfMonth -Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value. +Returns the number of the day within the month (1-31) of a date or date with time. **Syntax** @@ -576,7 +574,7 @@ Result: ## toDayOfWeek -Converts a date or date with time to the number of the day in the week as `UInt8` value. +Returns the number of the day within the week of a date or date with time. The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument. @@ -627,7 +625,7 @@ Result: ## toHour -Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value. +Returns the hour component (0-24) of a date with time. Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone). @@ -641,7 +639,7 @@ Alias: `HOUR` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -665,7 +663,7 @@ Result: ## toMinute -Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value. +Returns the minute component (0-59) a date with time. **Syntax** @@ -677,7 +675,7 @@ Alias: `MINUTE` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -701,7 +699,7 @@ Result: ## toSecond -Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered. +Returns the second component (0-59) of a date with time. Leap seconds are not considered. **Syntax** @@ -713,7 +711,7 @@ Alias: `SECOND` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -735,6 +733,40 @@ Result: └─────────────────────────────────────────────┘ ``` +## toMillisecond + +Returns the millisecond component (0-999) of a date with time. + +**Syntax** + +```sql +toMillisecond(value) +``` + +*Arguments** + +- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +Alias: `MILLISECOND` + +```sql +SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)) +``` + +Result: + +```response +┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┠+│ 456 │ +└────────────────────────────────────────────────────────────┘ +``` + +**Returned value** + +- The millisecond in the minute (0 - 59) of the given date/time + +Type: `UInt16` + ## toUnixTimestamp Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation. diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 0e72b489ace..7bf66c0504a 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -3,13 +3,13 @@ #include #include #include +#include #include #include #include #include - #define DATE_SECONDS_PER_DAY 86400 /// Number of seconds in a day, 60 * 60 * 24 #define DATE_LUT_MIN_YEAR 1900 /// 1900 since majority of financial organizations consider 1900 as an initial year. @@ -280,9 +280,9 @@ private: static_assert(std::is_integral_v && std::is_integral_v); assert(divisor > 0); - if (likely(offset_is_whole_number_of_hours_during_epoch)) + if (offset_is_whole_number_of_hours_during_epoch) [[likely]] { - if (likely(x >= 0)) + if (x >= 0) [[likely]] return static_cast(x / divisor * divisor); /// Integer division for negative numbers rounds them towards zero (up). @@ -576,10 +576,10 @@ public: unsigned toSecond(Time t) const { - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { Time res = t % 60; - if (likely(res >= 0)) + if (res >= 0) [[likely]] return static_cast(res); return static_cast(res) + 60; } @@ -593,6 +593,30 @@ public: return time % 60; } + template + unsigned toMillisecond(const DateOrTime & datetime, Int64 scale_multiplier) const + { + constexpr Int64 millisecond_multiplier = 1'000; + constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier; + constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier; + + auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier); + + if (datetime.value < 0 && components.fractional) + { + components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; + --components.whole; + } + Int64 fractional = components.fractional; + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); + + UInt16 millisecond = static_cast(fractional / divider); + return millisecond; + } + unsigned toMinute(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) @@ -1122,9 +1146,9 @@ public: DateOrTime toStartOfMinuteInterval(DateOrTime t, UInt64 minutes) const { Int64 divisor = 60 * minutes; - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { - if (likely(t >= 0)) + if (t >= 0) [[likely]] return static_cast(t / divisor * divisor); return static_cast((t + 1 - divisor) / divisor * divisor); } @@ -1339,7 +1363,7 @@ public: UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const { - if (likely(day_of_month <= 28)) + if (day_of_month <= 28) [[likely]] return day_of_month; UInt8 days_in_month = daysInMonth(year, month); diff --git a/src/Functions/DateTimeTransforms.cpp b/src/Functions/DateTimeTransforms.cpp index 7ec13be9d6d..006d1e94ccd 100644 --- a/src/Functions/DateTimeTransforms.cpp +++ b/src/Functions/DateTimeTransforms.cpp @@ -10,16 +10,17 @@ namespace ErrorCodes void throwDateIsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", name); -} - -void throwDateTimeIsNotSupported(const char * name) -{ - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date for function {}", name); } void throwDate32IsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date32 of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date32 for function {}", name); } + +void throwDateTimeIsNotSupported(const char * name) +{ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type DateTime for function {}", name); +} + } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index cad40d0acb8..6a5603339fc 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -6,6 +6,7 @@ #include #include #include +#include "base/Decimal.h" #include #include #include @@ -54,8 +55,8 @@ constexpr time_t MAX_DATE_TIMESTAMP = 5662310399; // 2149-06-06 23:59:59 U constexpr time_t MAX_DATETIME_DAY_NUM = 49710; // 2106-02-07 [[noreturn]] void throwDateIsNotSupported(const char * name); -[[noreturn]] void throwDateTimeIsNotSupported(const char * name); [[noreturn]] void throwDate32IsNotSupported(const char * name); +[[noreturn]] void throwDateTimeIsNotSupported(const char * name); /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform @@ -481,7 +482,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -516,7 +517,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -559,7 +560,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -602,7 +603,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { @@ -623,7 +624,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { @@ -644,7 +645,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) { @@ -777,7 +778,7 @@ struct ToTimeImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -802,7 +803,7 @@ struct ToStartOfMinuteImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -849,7 +850,7 @@ struct ToStartOfSecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -897,7 +898,7 @@ struct ToStartOfMillisecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -941,7 +942,7 @@ struct ToStartOfMicrosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -979,7 +980,7 @@ struct ToStartOfNanosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1004,7 +1005,7 @@ struct ToStartOfFiveMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1036,7 +1037,7 @@ struct ToStartOfTenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1068,7 +1069,7 @@ struct ToStartOfFifteenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1103,7 +1104,7 @@ struct TimeSlotImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1142,7 +1143,7 @@ struct ToStartOfHourImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1429,7 +1430,7 @@ struct ToHourImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1456,7 +1457,7 @@ struct TimezoneOffsetImpl static time_t execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static time_t execute(UInt16, const DateLUTImpl &) @@ -1482,7 +1483,7 @@ struct ToMinuteImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1507,7 +1508,7 @@ struct ToSecondImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1518,6 +1519,32 @@ struct ToSecondImpl using FactorTransform = ToStartOfMinuteImpl; }; +struct ToMillisecondImpl +{ + static constexpr auto name = "toMillisecond"; + + static UInt16 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl & time_zone) + { + return time_zone.toMillisecond(datetime64, scale_multiplier); + } + + static UInt16 execute(UInt32, const DateLUTImpl &) + { + return 0; + } + static UInt16 execute(Int32, const DateLUTImpl &) + { + throwDate32IsNotSupported(name); + } + static UInt16 execute(UInt16, const DateLUTImpl &) + { + throwDateIsNotSupported(name); + } + static constexpr bool hasPreimage() { return false; } + + using FactorTransform = ZeroTransform; +}; + struct ToISOYearImpl { static constexpr auto name = "toISOYear"; diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp new file mode 100644 index 00000000000..e15b56cc555 --- /dev/null +++ b/src/Functions/toMillisecond.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + +namespace DB +{ + +using FunctionToMillisecond = FunctionDateOrDateTimeToSomething; + +REGISTER_FUNCTION(ToMillisecond) +{ + factory.registerFunction(); + + /// MySQL compatibility alias. + factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02998_to_milliseconds.reference b/tests/queries/0_stateless/02998_to_milliseconds.reference new file mode 100644 index 00000000000..05139c19d1d --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.reference @@ -0,0 +1,8 @@ +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30.123 123 123 +2023-04-21 10:20:30.123456 123 123 +2023-04-21 10:20:30.123456789 123 123 +120 +2023-04-21 10:20:30 0 +2023-04-21 10:20:30 0 diff --git a/tests/queries/0_stateless/02998_to_milliseconds.sql b/tests/queries/0_stateless/02998_to_milliseconds.sql new file mode 100644 index 00000000000..f159f6aab50 --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.sql @@ -0,0 +1,17 @@ +-- Negative tests +SELECT toMillisecond(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toMillisecond('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate32('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Tests with constant and non-constant arguments +SELECT toDateTime('2023-04-21 10:20:30') AS dt, toMillisecond(dt), toMillisecond(materialize(dt)); +SELECT toDateTime64('2023-04-21 10:20:30', 0) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123', 3) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456', 6) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456789', 9) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); + +-- Special cases +SELECT MILLISECOND(toDateTime64('2023-04-21 10:20:30.123456', 2)); -- Alias +SELECT toNullable(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- Nullable +SELECT toLowCardinality(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- LowCardinality diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6db3ac23b05..b09b41c5d70 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2542,6 +2542,7 @@ toRelativeSecondNum toRelativeWeekNum toRelativeYearNum toSecond +toMillisecond toStartOfDay toStartOfFifteenMinutes toStartOfFiveMinutes From 2811b24a4706eaa490ac6db26e0c59a26dd1f4d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 1 Mar 2024 14:42:46 +0000 Subject: [PATCH 131/356] Fixing build --- src/Storages/System/StorageSystemDNSCache.cpp | 2 +- src/Storages/System/StorageSystemDNSCache.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/StorageSystemDNSCache.cpp b/src/Storages/System/StorageSystemDNSCache.cpp index 867d1fd55b9..6fb83597908 100644 --- a/src/Storages/System/StorageSystemDNSCache.cpp +++ b/src/Storages/System/StorageSystemDNSCache.cpp @@ -32,7 +32,7 @@ ColumnsDescription StorageSystemDNSCache::getColumnsDescription() }; } -void StorageSystemDNSCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDNSCache::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { using HostIPPair = std::pair; std::unordered_set>> reported_elements; diff --git a/src/Storages/System/StorageSystemDNSCache.h b/src/Storages/System/StorageSystemDNSCache.h index 340060335b8..dd24d2f35f6 100644 --- a/src/Storages/System/StorageSystemDNSCache.h +++ b/src/Storages/System/StorageSystemDNSCache.h @@ -9,7 +9,7 @@ namespace DB class Context; /// system.dns_cache table. -class StorageSystemDNSCache final : public IStorageSystemOneBlock +class StorageSystemDNSCache final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDNSCache"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } From 47aa33408bee0ffc7f5857bd4859bc8d0157f51e Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 1 Mar 2024 12:06:12 +0000 Subject: [PATCH 132/356] Resubmit "Analyzer: compute ALIAS columns right after reading" This reverts commit 8141e1c3d19072d3aefd5bfc162b97bd6655ce96, reversing changes made to fb099bbd62b012a3b0c22ab7bd94078ea4883182. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 5 +- .../getHeaderForProcessingStage.cpp | 7 +- src/Planner/CollectTableExpressionData.cpp | 166 +++++++++++------- src/Planner/CollectTableExpressionData.h | 2 +- src/Planner/PlannerActionsVisitor.cpp | 5 +- src/Planner/PlannerJoinTree.cpp | 96 +++------- src/Planner/TableExpressionData.h | 115 +++++------- src/Planner/Utils.cpp | 11 +- src/Planner/Utils.h | 5 + .../QueryPlan/ReadFromMergeTree.cpp | 5 + src/Storages/StorageDistributed.cpp | 28 +++ tests/analyzer_integration_broken_tests.txt | 1 - .../test_row_policy.py | 2 +- tests/integration/test_row_policy/test.py | 3 +- .../02514_analyzer_drop_join_on.reference | 12 +- ..._support_alias_column_in_indices.reference | 13 +- 16 files changed, 245 insertions(+), 231 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 907a732493d..1f81ac54078 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6651,7 +6651,6 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table if (column_default && column_default->kind == ColumnDefaultKind::Alias) { auto alias_expression = buildQueryTree(column_default->expression, scope.context); - alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, scope.context, false /*resolve*/); auto column_node = std::make_shared(column_name_and_type, std::move(alias_expression), table_expression_node); column_name_to_column_node.emplace(column_name_and_type.name, column_node); alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node); @@ -6684,7 +6683,9 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table alias_column_resolve_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - + auto & resolved_expression = alias_column_to_resolve->getExpression(); + if (!resolved_expression->getResultType()->equals(*alias_column_to_resolve->getResultType())) + resolved_expression = buildCastFunction(resolved_expression, alias_column_to_resolve->getResultType(), scope.context, true); column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node); column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve; } diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 21739298036..75b0e710fbe 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -121,7 +121,12 @@ Block getHeaderForProcessingStage( auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(left_table_expression); const auto & query_context = query_info.planner_context->getQueryContext(); - auto columns = table_expression_data.getColumns(); + + NamesAndTypes columns; + const auto & column_name_to_column = table_expression_data.getColumnNameToColumn(); + for (const auto & column_name : table_expression_data.getSelectedColumnsNames()) + columns.push_back(column_name_to_column.at(column_name)); + auto new_query_node = buildSubqueryToReadColumnsFromTableExpression(columns, left_table_expression, query_context); query = new_query_node->toAST(); } diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 78a7c7074c3..385381f1355 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -29,34 +29,13 @@ namespace class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor { public: - explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_) + explicit CollectSourceColumnsVisitor(PlannerContextPtr & planner_context_, bool keep_alias_columns_ = true) : planner_context(planner_context_) + , keep_alias_columns(keep_alias_columns_) {} void visitImpl(QueryTreeNodePtr & node) { - /// Special case for USING clause which contains references to ALIAS columns. - /// We can not modify such ColumnNode. - if (auto * join_node = node->as()) - { - if (!join_node->isUsingJoinExpression()) - return; - - auto & using_list = join_node->getJoinExpression()->as(); - for (auto & using_element : using_list) - { - auto & column_node = using_element->as(); - /// This list contains column nodes from left and right tables. - auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as().getNodes(); - - /// Visit left table column node. - visitUsingColumn(columns_from_subtrees[0]); - /// Visit right table column node. - visitUsingColumn(columns_from_subtrees[1]); - } - return; - } - auto * column_node = node->as(); if (!column_node) return; @@ -72,22 +51,55 @@ public: /// JOIN using expression if (column_node->hasExpression() && column_source_node_type == QueryTreeNodeType::JOIN) - return; - - auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node); - - if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN) { - /// Replace ALIAS column with expression + auto & columns_from_subtrees = column_node->getExpression()->as().getNodes(); + if (columns_from_subtrees.size() != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected two columns in JOIN using expression for column {}", column_node->dumpTree()); + + visit(columns_from_subtrees[0]); + visit(columns_from_subtrees[1]); + return; + } + + auto & table_expression_data = planner_context->getOrCreateTableExpressionData(column_source_node); + + if (isAliasColumn(node)) + { + /// Column is an ALIAS column with expression bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (!column_already_exists) { - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier); + CollectSourceColumnsVisitor visitor_for_alias_column(planner_context); + /// While we are processing expression of ALIAS columns we should not add source columns to selected. + /// See also comment for `select_added_columns` + visitor_for_alias_column.select_added_columns = false; + visitor_for_alias_column.keep_alias_columns = keep_alias_columns; + visitor_for_alias_column.visit(column_node->getExpression()); + + if (!keep_alias_columns) + { + /// For PREWHERE we can just replace ALIAS column with it's expression, + /// because ActionsDAG for PREWHERE applied right on top of table expression + /// and cannot affect subqueries or other table expressions. + node = column_node->getExpression(); + return; + } + + auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); + + ActionsDAGPtr alias_column_actions_dag = std::make_shared(); + PlannerActionsVisitor actions_visitor(planner_context, false); + auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); + if (outputs.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); + const auto & column_name = column_node->getColumnName(); + const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); + alias_column_actions_dag->addOrReplaceInOutputs(alias_node); + table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns); } - node = column_node->getExpression(); - visitImpl(node); return; } @@ -102,45 +114,58 @@ public: bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (column_already_exists) + { + /// Column may be added when we collected data for ALIAS column + /// But now we see it directly in the query, so make sure it's marked as selected + if (select_added_columns) + table_expression_data.markSelectedColumn(column_node->getColumnName()); return; + } - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addColumn(column_node->getColumn(), column_identifier); + auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addColumn(column_node->getColumn(), column_identifier, select_added_columns); } - static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node) + static bool isAliasColumn(const QueryTreeNodePtr & node) + { + const auto * column_node = node->as(); + if (!column_node || !column_node->hasExpression()) + return false; + const auto & column_source = column_node->getColumnSourceOrNull(); + if (!column_source) + return false; + return column_source->getNodeType() != QueryTreeNodeType::JOIN && + column_source->getNodeType() != QueryTreeNodeType::ARRAY_JOIN; + } + + static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr & child_node) { - if (auto * join_node = parent->as()) - { - if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression()) - return false; - } auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); + return !(child_node_type == QueryTreeNodeType::QUERY || + child_node_type == QueryTreeNodeType::UNION || + isAliasColumn(parent_node)); + } + + void setKeepAliasColumns(bool keep_alias_columns_) + { + keep_alias_columns = keep_alias_columns_; } private: + PlannerContextPtr & planner_context; - void visitUsingColumn(QueryTreeNodePtr & node) - { - auto & column_node = node->as(); - if (column_node.hasExpression()) - { - auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource()); - bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName()); - if (column_already_exists) - return; + /// Replace ALIAS columns with their expressions or register them in table expression data. + /// Usually we can replace them when we build some "local" actions DAG + /// (for example Row Policy or PREWHERE) that is applied on top of the table expression. + /// In other cases, we keep ALIAS columns as ColumnNode with an expression child node, + /// and handle them in the Planner by inserting ActionsDAG to compute them after reading from storage. + bool keep_alias_columns = true; - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier); - - visitImpl(column_node.getExpressionOrThrow()); - } - else - visitImpl(node); - } - - PlannerContext & planner_context; + /// Flag `select_added_columns` indicates if we should mark column as explicitly selected. + /// For example, for table with columns (a Int32, b ALIAS a+1) and query SELECT b FROM table + /// Column `b` is selected explicitly by user, but not `a` (that is also read though). + /// Distinguishing such columns is important for checking access rights for ALIAS columns. + bool select_added_columns = true; }; class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor @@ -274,7 +299,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } - CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); for (auto & node : query_node_typed.getChildren()) { if (!node || node == query_node_typed.getPrewhere()) @@ -300,21 +325,26 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression); - const auto & column_names = table_expression_data.getColumnNames(); - NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end()); + const auto & read_column_names = table_expression_data.getColumnNames(); + NameSet required_column_names_without_prewhere(read_column_names.begin(), read_column_names.end()); + const auto & selected_column_names = table_expression_data.getSelectedColumnsNames(); + required_column_names_without_prewhere.insert(selected_column_names.begin(), selected_column_names.end()); + collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); auto prewhere_actions_dag = std::make_shared(); + QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); + PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere()); + auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", query_node->formatASTForErrorMessage()); - prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]); + prewhere_actions_dag->getOutputs().push_back(expression_nodes.back()); for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs()) if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name)) @@ -324,9 +354,9 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context) +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns) { - CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context, keep_alias_columns); collect_source_columns_visitor.visit(expression_node); } diff --git a/src/Planner/CollectTableExpressionData.h b/src/Planner/CollectTableExpressionData.h index ed3f0ff7a47..b0cebc15682 100644 --- a/src/Planner/CollectTableExpressionData.h +++ b/src/Planner/CollectTableExpressionData.h @@ -19,6 +19,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr * * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression. */ -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context); +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns = true); } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 511e9396a35..c417d463c73 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -451,6 +451,7 @@ private: std::unordered_map node_to_node_name; const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; + bool use_column_identifier_as_action_node_name; }; PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, @@ -458,6 +459,7 @@ PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) + , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) { actions_stack.emplace_back(std::move(actions_dag), nullptr); } @@ -503,7 +505,8 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi { auto column_node_name = action_node_name_helper.calculateActionNodeName(node); const auto & column_node = node->as(); - + if (column_node.hasExpression() && !use_column_identifier_as_action_node_name) + return visitImpl(column_node.getExpression()); Int64 actions_stack_size = static_cast(actions_stack.size() - 1); for (Int64 i = actions_stack_size; i >= 0; --i) { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 59da88f4e45..7b3fb0c5c91 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -86,7 +86,7 @@ namespace /// Check if current user has privileges to SELECT columns from table /// Throws an exception if access to any column from `column_names` is not granted /// If `column_names` is empty, check access to any columns and return names of accessible columns -NameSet checkAccessRights(const TableNode & table_node, Names & column_names, const ContextPtr & query_context) +NameSet checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context) { /// StorageDummy is created on preliminary stage, ignore access check for it. if (typeid_cast(table_node.getStorage().get())) @@ -353,9 +353,7 @@ void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expr NameSet columns_names_allowed_to_select; if (table_node) { - auto column_names_with_aliases = columns_names; - const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); - column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); + const auto & column_names_with_aliases = table_expression_data.getSelectedColumnsNames(); columns_names_allowed_to_select = checkAccessRights(*table_node, column_names_with_aliases, query_context); } @@ -864,6 +862,28 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres max_block_size, max_streams); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) + { + ActionsDAGPtr merged_alias_columns_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); + + for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) + { + const auto & current_outputs = alias_column_actions_dag->getOutputs(); + action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); + merged_alias_columns_actions_dag->mergeNodes(std::move(*alias_column_actions_dag)); + } + + for (const auto * output_node : action_dag_outputs) + merged_alias_columns_actions_dag->addOrReplaceInOutputs(*output_node); + merged_alias_columns_actions_dag->removeUnusedActions(false); + + auto alias_column_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(merged_alias_columns_actions_dag)); + alias_column_step->setStepDescription("Compute alias columns"); + query_plan.addStep(std::move(alias_column_step)); + } + for (const auto & filter_info_and_description : where_filters) { const auto & [filter_info, description] = filter_info_and_description; @@ -907,7 +927,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres else { /// Create step which reads from empty source if storage has no data. - auto source_header = storage_snapshot->getSampleBlockForColumns(table_expression_data.getColumnNames()); + const auto & column_names = table_expression_data.getSelectedColumnsNames(); + auto source_header = storage_snapshot->getSampleBlockForColumns(column_names); Pipe pipe(std::make_shared(source_header)); auto read_from_pipe = std::make_unique(std::move(pipe)); read_from_pipe->setStepDescription("Read from NullSource"); @@ -1024,57 +1045,6 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP plan_to_add_cast.addStep(std::move(cast_join_columns_step)); } -/// Actions to calculate table columns that have a functional representation (ALIASes and subcolumns) -/// and used in USING clause of JOIN expression. -struct UsingAliasKeyActions -{ - UsingAliasKeyActions( - const ColumnsWithTypeAndName & left_plan_output_columns, - const ColumnsWithTypeAndName & right_plan_output_columns - ) - : left_alias_columns_keys(std::make_shared(left_plan_output_columns)) - , right_alias_columns_keys(std::make_shared(right_plan_output_columns)) - {} - - void addLeftColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - addColumnImpl(left_alias_columns_keys, node, plan_output_columns, planner_context); - } - - void addRightColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - addColumnImpl(right_alias_columns_keys, node, plan_output_columns, planner_context); - } - - ActionsDAGPtr getLeftActions() - { - left_alias_columns_keys->projectInput(); - return std::move(left_alias_columns_keys); - } - - ActionsDAGPtr getRightActions() - { - right_alias_columns_keys->projectInput(); - return std::move(right_alias_columns_keys); - } - -private: - void addColumnImpl(ActionsDAGPtr & alias_columns_keys, QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - auto & column_node = node->as(); - if (column_node.hasExpression()) - { - auto dag = buildActionsDAGFromExpressionNode(column_node.getExpressionOrThrow(), plan_output_columns, planner_context); - const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(node); - dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs().front(), left_inner_column_identifier)); - alias_columns_keys->mergeInplace(std::move(*dag)); - } - } - - ActionsDAGPtr left_alias_columns_keys; - ActionsDAGPtr right_alias_columns_keys; -}; - JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression, JoinTreeQueryPlan left_join_tree_query_plan, JoinTreeQueryPlan right_join_tree_query_plan, @@ -1143,8 +1113,6 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ if (join_node.isUsingJoinExpression()) { - UsingAliasKeyActions using_alias_key_actions{left_plan_output_columns, right_plan_output_columns}; - auto & join_node_using_columns_list = join_node.getJoinExpression()->as(); for (auto & join_node_using_node : join_node_using_columns_list.getNodes()) { @@ -1154,13 +1122,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto & left_inner_column_node = inner_columns_list.getNodes().at(0); auto & left_inner_column = left_inner_column_node->as(); - using_alias_key_actions.addLeftColumn(left_inner_column_node, left_plan_output_columns, planner_context); - auto & right_inner_column_node = inner_columns_list.getNodes().at(1); auto & right_inner_column = right_inner_column_node->as(); - using_alias_key_actions.addRightColumn(right_inner_column_node, right_plan_output_columns, planner_context); - const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType(); if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) { @@ -1174,14 +1138,6 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type); } } - - auto left_alias_columns_keys_step = std::make_unique(left_plan.getCurrentDataStream(), using_alias_key_actions.getLeftActions()); - left_alias_columns_keys_step->setStepDescription("Actions for left table alias column keys"); - left_plan.addStep(std::move(left_alias_columns_keys_step)); - - auto right_alias_columns_keys_step = std::make_unique(right_plan.getCurrentDataStream(), using_alias_key_actions.getRightActions()); - right_alias_columns_keys_step->setStepDescription("Actions for right table alias column keys"); - right_plan.addStep(std::move(right_alias_columns_keys_step)); } auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 20c4f05ea7e..9ab7a8e64fe 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -55,7 +55,7 @@ public: /// Return true if column with name exists, false otherwise bool hasColumn(const std::string & column_name) const { - return alias_columns_names.contains(column_name) || column_name_to_column.contains(column_name); + return column_name_to_column.contains(column_name); } /** Add column in table expression data. @@ -63,37 +63,40 @@ public: * * Logical error exception is thrown if column already exists. */ - void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool is_selected_column = true) { if (hasColumn(column.name)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists", column.name); - addColumnImpl(column, column_identifier); + column_names.push_back(column.name); + addColumnImpl(column, column_identifier, is_selected_column); } - /** Add column if it does not exists in table expression data. - * Column identifier must be created using global planner context. - */ - void addColumnIfNotExists(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + /// Add alias column + void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAGPtr actions_dag, bool is_selected_column = true) { - if (hasColumn(column.name)) - return; - - addColumnImpl(column, column_identifier); + alias_column_expressions.emplace(column.name, std::move(actions_dag)); + addColumnImpl(column, column_identifier, is_selected_column); } - /// Add alias column name - void addAliasColumnName(const std::string & column_name, const ColumnIdentifier & column_identifier) + /// Mark existing column as selected + void markSelectedColumn(const std::string & column_name) { - alias_columns_names.insert(column_name); - - column_name_to_column_identifier.emplace(column_name, column_identifier); + auto [_, inserted] = selected_column_names_set.emplace(column_name); + if (inserted) + selected_column_names.push_back(column_name); } - /// Get alias columns names - const NameSet & getAliasColumnsNames() const + /// Get columns that are requested from table expression, including ALIAS columns + const Names & getSelectedColumnsNames() const { - return alias_columns_names; + return selected_column_names; + } + + /// Get ALIAS columns names mapped to expressions + const std::unordered_map & getAliasColumnExpressions() const + { + return alias_column_expressions; } /// Get column name to column map @@ -102,7 +105,7 @@ public: return column_name_to_column; } - /// Get column names + /// Get column names that are read from table expression const Names & getColumnNames() const { return column_names; @@ -119,23 +122,6 @@ public: return result; } - ColumnIdentifiers getColumnIdentifiers() const - { - ColumnIdentifiers result; - result.reserve(column_identifier_to_column_name.size()); - - for (const auto & [column_identifier, _] : column_identifier_to_column_name) - result.push_back(column_identifier); - - return result; - } - - /// Get column name to column identifier map - const ColumnNameToColumnIdentifier & getColumnNameToIdentifier() const - { - return column_name_to_column_identifier; - } - /// Get column identifier to column name map const ColumnNameToColumnIdentifier & getColumnIdentifierToColumnName() const { @@ -159,18 +145,6 @@ public: return it->second; } - /** Get column for column name. - * Null is returned if there are no column for column name. - */ - const NameAndTypePair * getColumnOrNull(const std::string & column_name) const - { - auto it = column_name_to_column.find(column_name); - if (it == column_name_to_column.end()) - return nullptr; - - return &it->second; - } - /** Get column identifier for column name. * Exception is thrown if there are no column identifier for column name. */ @@ -200,24 +174,6 @@ public: return &it->second; } - /** Get column name for column identifier. - * Exception is thrown if there are no column name for column identifier. - */ - const std::string & getColumnNameOrThrow(const ColumnIdentifier & column_identifier) const - { - auto it = column_identifier_to_column_name.find(column_identifier); - if (it == column_identifier_to_column_name.end()) - { - auto column_identifiers = getColumnIdentifiers(); - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column name for column identifier {} does not exists. There are only column identifiers: {}", - column_identifier, - fmt::join(column_identifiers.begin(), column_identifiers.end(), ", ")); - } - - return it->second; - } - /** Get column name for column identifier. * Null is returned if there are no column name for column identifier. */ @@ -296,23 +252,36 @@ public: } private: - void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool add_to_selected_columns) { - column_names.push_back(column.name); + if (add_to_selected_columns) + markSelectedColumn(column.name); + column_name_to_column.emplace(column.name, column); column_name_to_column_identifier.emplace(column.name, column_identifier); column_identifier_to_column_name.emplace(column_identifier, column.name); } - /// Valid for table, table function, array join, query, union nodes + /// Set of columns that are physically read from table expression + /// In case of ALIAS columns it contains source column names that are used to calculate alias + /// This source column may be not used by user Names column_names; + /// Set of columns that are SELECTed from table expression + /// It may contain ALIAS columns. + /// Mainly it's used to determine access to which columns to check + /// For example user may have an access to column `a ALIAS x + y` but not to `x` and `y` + /// In that case we can read `x` and `y` and calculate `a`, but not return `x` and `y` to user + Names selected_column_names; + /// To deduplicate columns in `selected_column_names` + NameSet selected_column_names_set; + + /// Expression to calculate ALIAS columns + std::unordered_map alias_column_expressions; + /// Valid for table, table function, array join, query, union nodes ColumnNameToColumn column_name_to_column; - /// Valid only for table node - NameSet alias_columns_names; - /// Valid for table, table function, array join, query, union nodes ColumnNameToColumnIdentifier column_name_to_column_identifier; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 5f5875b8019..bd0b831ee58 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -469,12 +469,19 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, NameSet table_expression_required_names_without_filter) { const auto & query_context = planner_context->getQueryContext(); - auto filter_query_tree = buildQueryTree(filter_expression, query_context); QueryAnalysisPass query_analysis_pass(table_expression); query_analysis_pass.run(filter_query_tree, query_context); + return buildFilterInfo(std::move(filter_query_tree), table_expression, planner_context, std::move(table_expression_required_names_without_filter)); +} + +FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, + const QueryTreeNodePtr & table_expression, + PlannerContextPtr & planner_context, + NameSet table_expression_required_names_without_filter) +{ if (table_expression_required_names_without_filter.empty()) { auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); @@ -482,7 +489,7 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, table_expression_required_names_without_filter.insert(table_expression_names.begin(), table_expression_names.end()); } - collectSourceColumns(filter_query_tree, planner_context); + collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); collectSets(filter_query_tree, *planner_context); auto filter_actions_dag = std::make_shared(); diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 3060b1c2711..bf45770552b 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -89,6 +89,11 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, PlannerContextPtr & planner_context, NameSet table_expression_required_names_without_filter = {}); +FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, + const QueryTreeNodePtr & table_expression, + PlannerContextPtr & planner_context, + NameSet table_expression_required_names_without_filter = {}); + ASTPtr parseAdditionalResultFilter(const Settings & settings); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c7b9eb72d4d..331bd46f909 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1434,8 +1434,13 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) if (query_info.planner_context) { const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) { + /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, + /// so they should not be added to the input nodes. + if (alias_column_expressions.contains(column_name)) + continue; const auto & column = table_expression_data.getColumnOrThrow(column_name); node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 92e7dcdf4f2..7370bd3ab8f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -744,6 +744,32 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( namespace { +class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor +{ + static QueryTreeNodePtr getColumnNodeAliasExpression(const QueryTreeNodePtr & node) + { + const auto * column_node = node->as(); + if (!column_node || !column_node->hasExpression()) + return nullptr; + + const auto & column_source = column_node->getColumnSourceOrNull(); + if (!column_source || column_source->getNodeType() == QueryTreeNodeType::JOIN + || column_source->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) + return nullptr; + + auto column_expression = column_node->getExpression(); + column_expression->setAlias(column_node->getColumnName()); + return column_expression; + } + +public: + void visitImpl(QueryTreeNodePtr & node) + { + if (auto column_expression = getColumnNodeAliasExpression(node)) + node = column_expression; + } +}; + QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, const StorageSnapshotPtr & distributed_storage_snapshot, const StorageID & remote_storage_id, @@ -796,6 +822,8 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, replacement_table_expression->setAlias(query_info.table_expression->getAlias()); auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); + ReplaseAliasColumnsVisitor replase_alias_columns_visitor; + replase_alias_columns_visitor.visit(query_tree_to_modify); return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify); } diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 796ca6bca22..8f72fcd4050 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -4,4 +4,3 @@ test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster -test_select_access_rights/test_main.py::test_alias_columns diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py index b620e88e7eb..c09a80cea06 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -41,7 +41,7 @@ def started_cluster(): CREATE TABLE mydb.filtered_table2 (a UInt8, b UInt8, c UInt8, d UInt8) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table2 values (0, 0, 0, 0), (1, 2, 3, 4), (4, 3, 2, 1), (0, 0, 6, 0); - CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; + CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, bb ALIAS b + 1, c UInt16 ALIAS a + bb - 1) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table3 values (0, 0), (0, 1), (1, 0), (1, 1); CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 98653bf6106..8260be78e82 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -60,7 +60,7 @@ def started_cluster(): CREATE TABLE mydb.filtered_table2 (a UInt8, b UInt8, c UInt8, d UInt8) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table2 values (0, 0, 0, 0), (1, 2, 3, 4), (4, 3, 2, 1), (0, 0, 6, 0); - CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; + CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, bb ALIAS b + 1, c UInt16 ALIAS a + bb - 1) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table3 values (0, 0), (0, 1), (1, 0), (1, 1); CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; @@ -113,6 +113,7 @@ def test_smoke(): assert node.query("SELECT a FROM mydb.filtered_table3") == TSV([[0], [1]]) assert node.query("SELECT b FROM mydb.filtered_table3") == TSV([[1], [0]]) + assert node.query("SELECT bb FROM mydb.filtered_table3") == TSV([[2], [1]]) assert node.query("SELECT c FROM mydb.filtered_table3") == TSV([[1], [1]]) assert node.query("SELECT a + b FROM mydb.filtered_table3") == TSV([[1], [1]]) assert node.query("SELECT a FROM mydb.filtered_table3 WHERE c = 1") == TSV( diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index a5a71560d00..2c62e278050 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -55,33 +55,33 @@ Header: a2 String Header: __table1.a2 String __table1.k UInt64 __table4.d2 String - Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) + Expression (DROP unused columns after JOIN) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) + Expression (DROP unused columns after JOIN) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression ((Actions for left table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table1.a2 String __table1.k UInt64 ReadFromMemoryStorage Header: a2 String k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table2.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table3.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table4.d2 String __table4.k UInt64 ReadFromMemoryStorage diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference index 883966ce6b5..b867a31dcc3 100644 --- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference +++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference @@ -14,13 +14,13 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) ReadFromMergeTree (02911_support_alias_column_in_indices.test1) Indexes: PrimaryKey Keys: c - Condition: (_CAST(plus(c, \'UInt64\'), 1) in [11, +Inf)) + Condition: (plus(c, 1) in [11, +Inf)) Parts: 1/2 Granules: 1/2 Skip @@ -44,12 +44,17 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) ReadFromMergeTree (02911_support_alias_column_in_indices.test2) Indexes: PrimaryKey Keys: c - Condition: (_CAST(plus(_CAST(plus(c, \'UInt64\'), 1), \'UInt64\'), 1) in [16, +Inf)) + Condition: (plus(plus(c, 1), 1) in [16, +Inf)) Parts: 1/2 Granules: 1/2 + Skip + Name: i + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 1/1 From 361ec17c871d69cccb0d2994ea312533b7d0272a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 1 Mar 2024 15:31:25 +0000 Subject: [PATCH 133/356] gdb... --- docker/test/fuzzer/run-fuzzer.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index b4376fe2409..1a1888861af 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -247,6 +247,9 @@ quit fuzzer_pid=$! echo "Fuzzer pid is $fuzzer_pid" + echo "Attaching gdb to the fuzzer itself" + gdb -batch -command script.gdb -p $fuzzer_pid & + # Wait for the fuzzer to complete. # Note that the 'wait || ...' thing is required so that the script doesn't # exit because of 'set -e' when 'wait' returns nonzero code. From 0b10612c863bf5b62bcf90028daa57275a966b6a Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 1 Mar 2024 17:42:57 +0000 Subject: [PATCH 134/356] fix --- src/Functions/FunctionBinaryArithmetic.h | 23 ++++++++++--------- src/Functions/IsOperation.h | 6 ++--- ...02_int_div_decimal_with_date_bug.reference | 0 .../03002_int_div_decimal_with_date_bug.sql | 5 ++++ 4 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.reference create mode 100644 tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index d253095ca01..9ad74f6332f 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -170,7 +170,8 @@ public: /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). using ResultDataType = Switch< /// Result must be Integer - Case::div_int || IsOperation::div_int_or_zero, DataTypeFromFieldType>, + Case::int_div || IsOperation::int_div_or_zero, + std::conditional_t && IsDataTypeNumber, DataTypeFromFieldType, InvalidType>>, /// Decimal cases Case || IsDataTypeDecimal, DecimalResultDataType>, Case< @@ -672,8 +673,8 @@ private: IsOperation::minus; static constexpr bool is_multiply = IsOperation::multiply; static constexpr bool is_float_division = IsOperation::div_floating; - static constexpr bool is_int_division = IsOperation::div_int || - IsOperation::div_int_or_zero; + static constexpr bool is_int_division = IsOperation::int_div || + IsOperation::int_div_or_zero; static constexpr bool is_division = is_float_division || is_int_division; static constexpr bool is_compare = IsOperation::least || IsOperation::greatest; @@ -781,8 +782,8 @@ class FunctionBinaryArithmetic : public IFunction static constexpr bool is_division = IsOperation::division; static constexpr bool is_bit_hamming_distance = IsOperation::bit_hamming_distance; static constexpr bool is_modulo = IsOperation::modulo; - static constexpr bool is_div_int = IsOperation::div_int; - static constexpr bool is_div_int_or_zero = IsOperation::div_int_or_zero; + static constexpr bool is_int_div = IsOperation::int_div; + static constexpr bool is_int_div_or_zero = IsOperation::int_div_or_zero; ContextPtr context; bool check_decimal_overflow = true; @@ -1007,11 +1008,11 @@ class FunctionBinaryArithmetic : public IFunction { function_name = "tupleModuloByNumber"; } - else if constexpr (is_div_int) + else if constexpr (is_int_div) { function_name = "tupleIntDivByNumber"; } - else if constexpr (is_div_int_or_zero) + else if constexpr (is_int_div_or_zero) { function_name = "tupleIntDivOrZeroByNumber"; } @@ -1466,7 +1467,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override { - return ((IsOperation::div_int || IsOperation::modulo || IsOperation::positive_modulo) && !arguments[1].is_const) + return ((IsOperation::int_div || IsOperation::modulo || IsOperation::positive_modulo) && !arguments[1].is_const) || (IsOperation::div_floating && (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type))); } @@ -1690,7 +1691,7 @@ public: if constexpr (!std::is_same_v) { - if constexpr (is_div_int || is_div_int_or_zero) + if constexpr (is_int_div || is_int_div_or_zero) type_res = std::make_shared(); else if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { @@ -2086,7 +2087,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A right_nullmap); } /// Here we check if we have `intDiv` or `intDivOrZero` and at least one of the arguments is decimal, because in this case originally we had result as decimal, so we need to convert result into integer after calculations - else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal)) + else if constexpr (!decimal_with_float && (is_int_div || is_int_div_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal)) { if constexpr (!std::is_same_v) @@ -2624,7 +2625,7 @@ public: /// Check the case when operation is divide, intDiv or modulo and denominator is Nullable(Something). /// For divide operation we should check only Nullable(Decimal), because only this case can throw division by zero error. bool division_by_nullable = !arguments[0].type->onlyNull() && !arguments[1].type->onlyNull() && arguments[1].type->isNullable() - && (IsOperation::div_int || IsOperation::modulo || IsOperation::positive_modulo + && (IsOperation::int_div || IsOperation::modulo || IsOperation::positive_modulo || (IsOperation::div_floating && (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type)))); diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index b2c7a27d375..a74df8f4dd9 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -51,8 +51,8 @@ struct IsOperation static constexpr bool minus = IsSameOperation::value; static constexpr bool multiply = IsSameOperation::value; static constexpr bool div_floating = IsSameOperation::value; - static constexpr bool div_int = IsSameOperation::value; - static constexpr bool div_int_or_zero = IsSameOperation::value; + static constexpr bool int_div = IsSameOperation::value; + static constexpr bool int_div_or_zero = IsSameOperation::value; static constexpr bool modulo = IsSameOperation::value; static constexpr bool positive_modulo = IsSameOperation::value; static constexpr bool least = IsSameOperation::value; @@ -60,7 +60,7 @@ struct IsOperation static constexpr bool bit_hamming_distance = IsSameOperation::value; - static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo; + static constexpr bool division = div_floating || int_div || int_div_or_zero || modulo; // NOTE: allow_decimal should not fully contain `division` because of divInt static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest; }; diff --git a/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.reference b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql new file mode 100644 index 00000000000..1668821200c --- /dev/null +++ b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql @@ -0,0 +1,5 @@ +SELECT intDiv(CAST('1.0', 'Decimal256(3)'), today()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(3)'), toDate('2023-01-02')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDate32('2023-01-02 12:12:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDateTime('2023-01-02 12:12:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDateTime64('2023-01-02 12:12:12.002', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From 95611889dc0f4b877c6c9bb914831041cd26f2c0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Mar 2024 18:44:33 +0000 Subject: [PATCH 135/356] fix tests --- src/Interpreters/TreeRewriter.cpp | 23 ++++++++++++------- src/Interpreters/inplaceBlockConversions.cpp | 1 - src/Storages/AlterCommands.cpp | 1 - src/Storages/FileLog/StorageFileLog.cpp | 1 - src/Storages/FileLog/StorageFileLog.h | 1 - src/Storages/HDFS/StorageHDFS.cpp | 5 ---- src/Storages/HDFS/StorageHDFS.h | 2 -- src/Storages/IStorage.cpp | 1 - src/Storages/Kafka/StorageKafka.cpp | 1 - src/Storages/Kafka/StorageKafka.h | 1 - src/Storages/LiveView/StorageLiveView.cpp | 1 - src/Storages/MergeTree/IMergeTreeReader.cpp | 4 ---- .../MergeTree/MergeTreeBlockReadUtils.cpp | 3 +-- src/Storages/MergeTree/MergeTreeData.cpp | 2 -- src/Storages/MergeTree/MergeTreeData.h | 2 -- .../MergeTree/MergeTreeDataPartCompact.cpp | 1 - .../MergeTree/MergeTreeDataPartWriterWide.cpp | 1 - .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1 - .../MergeTree/MergeTreeSelectProcessor.cpp | 1 - .../MergeTree/MergeTreeSelectProcessor.h | 1 - src/Storages/MergeTreeVirtualColumns.cpp | 2 +- .../StorageMaterializedPostgreSQL.cpp | 1 - .../StorageMaterializedPostgreSQL.h | 1 - src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 1 - src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 - src/Storages/StorageAzureBlob.cpp | 5 ---- src/Storages/StorageAzureBlob.h | 2 -- src/Storages/StorageDistributed.cpp | 3 --- src/Storages/StorageDistributed.h | 1 - src/Storages/StorageFile.cpp | 5 ---- src/Storages/StorageFile.h | 2 -- src/Storages/StorageKeeperMap.cpp | 1 - src/Storages/StorageLog.cpp | 1 - src/Storages/StorageMaterializedView.cpp | 2 -- src/Storages/StorageMerge.cpp | 2 -- src/Storages/StorageMerge.h | 1 - src/Storages/StorageS3.cpp | 5 ---- src/Storages/StorageS3.h | 2 -- src/Storages/StorageSnapshot.cpp | 1 - src/Storages/StorageURL.cpp | 5 ---- src/Storages/StorageURL.h | 2 -- .../System/StorageSystemDictionaries.cpp | 3 --- .../System/StorageSystemDictionaries.h | 1 - .../System/StorageSystemPartsBase.cpp | 1 - src/Storages/VirtualColumnUtils.cpp | 2 +- src/Storages/VirtualColumnUtils.h | 3 +-- src/Storages/VirtualColumnsDescription.cpp | 3 --- src/TableFunctions/ITableFunctionFileLike.cpp | 6 +++++ src/TableFunctions/ITableFunctionFileLike.h | 3 +++ .../TableFunctionAzureBlobStorage.cpp | 4 ++-- src/TableFunctions/TableFunctionFile.cpp | 6 ----- src/TableFunctions/TableFunctionFile.h | 2 -- src/TableFunctions/TableFunctionHDFS.cpp | 6 ----- src/TableFunctions/TableFunctionHDFS.h | 2 -- src/TableFunctions/TableFunctionS3.cpp | 4 ++-- src/TableFunctions/TableFunctionURL.cpp | 6 ----- src/TableFunctions/TableFunctionURL.h | 2 -- 57 files changed, 32 insertions(+), 122 deletions(-) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 896182c1f0f..5588fc55a64 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -992,7 +992,6 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical); options.withExtendedObjects(); options.withSubcolumns(storage->supportsSubcolumns()); - options.withVirtuals(); auto columns_from_storage = storage_snapshot->getColumns(options); @@ -1002,8 +1001,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end()); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto metadata_column_descriptions = metadata_snapshot->getColumns(); - source_columns_ordinary = metadata_column_descriptions.getOrdinary(); + source_columns_ordinary = metadata_snapshot->getColumns().getOrdinary(); } source_columns_set = removeDuplicateColumns(source_columns); @@ -1142,16 +1140,25 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } has_virtual_shard_num = false; - if (is_remote_storage) + /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add + /// in columns list, so that when further processing they are also considered. + if (storage_snapshot) { - for (const auto & column : *storage_snapshot->virtual_columns) + const auto & virtuals = storage_snapshot->virtual_columns; + for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { - if (column.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery())) + if (auto column = virtuals->tryGet(*it)) { - has_virtual_shard_num = true; - break; + source_columns.push_back(*column); + it = unknown_required_source_columns.erase(it); + } + else + { + ++it; } } + + has_virtual_shard_num = is_remote_storage && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()) && virtuals->has("_shard_num"); } /// Collect missed object subcolumns diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 061156c56db..239cce5b427 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -20,7 +20,6 @@ #include #include #include -#include namespace DB diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index acd95a2b8d7..e1c80112c72 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 2911f9b268b..a5f2331a068 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -27,7 +27,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index ede36600f92..91d58540c94 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -9,7 +9,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 4a481a6a7d2..19b2817510d 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -1178,11 +1178,6 @@ void registerStorageHDFS(StorageFactory & factory) }); } -Names StorageHDFS::getVirtualColumnNames() -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) { static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index e62fc180a6b..47e5addccb4 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -69,8 +69,6 @@ public: ContextPtr local_context, TableExclusiveLockHolder &) override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override { return true; } /// Check if the format is column-oriented. diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 18a934af767..9852220241f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,7 +1,6 @@ #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include #include diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 4b87b1f7b74..c63ebdfa28f 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -52,7 +52,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #if USE_KRB5 diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 6a14bd64cc6..605291fdd8c 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -7,7 +7,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 631c88a4dad..cef385c6f98 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,7 +27,6 @@ limitations under the License. */ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index dd55a6acb2c..8bef26ca240 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -2,11 +2,7 @@ #include #include #include -#include #include -#include -#include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index f97e07751e0..7d54d3867ac 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,8 +6,7 @@ #include #include #include -#include "Storages/ColumnsDescription.h" -#include "Storages/MergeTreeVirtualColumns.h" +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e52362ff76a..0b7ac39aa1b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,7 +8,6 @@ #include #include #include -#include "Common/logger_useful.h" #include #include #include @@ -23,7 +22,6 @@ #include #include #include -#include "Storages/ProjectionsDescription.h" #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index f7bde252fb9..3a685a4c397 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -37,8 +37,6 @@ #include #include #include -#include "Storages/ProjectionsDescription.h" -#include "Storages/VirtualColumnsDescription.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index ee34a02b0b3..9f201ab3b81 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 3a646e0b85d..d79590ded21 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ac0b104687d..428c8f92931 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -46,7 +46,6 @@ #include #include -#include #include namespace CurrentMetrics diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index e3900ccdd73..8a24e150bae 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -6,7 +6,6 @@ #include #include #include -#include "Storages/StorageSnapshot.h" #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index e25dfba4327..01bb3851e04 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -5,7 +5,6 @@ #include #include #include -#include "Storages/StorageSnapshot.h" namespace DB diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 1a576bef017..885e46c6828 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -1,8 +1,8 @@ #include +#include #include #include #include -#include namespace DB { diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 2914c17b117..c9866146a8b 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -1,5 +1,4 @@ #include "StorageMaterializedPostgreSQL.h" -#include "Storages/VirtualColumnsDescription.h" #if USE_LIBPQXX #include diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 41f72ea79e1..af2f13bb880 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -1,6 +1,5 @@ #pragma once -#include "Storages/VirtualColumnsDescription.h" #include "config.h" #if USE_LIBPQXX diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index c5df752fb7f..980fccd307e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -30,7 +30,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" namespace DB { diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 603a1f1d68f..e14741d9636 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -9,7 +9,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include #include diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index a74e274e5d9..92dbc1025db 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -886,11 +886,6 @@ SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMeta } } -Names StorageAzureBlob::getVirtualColumnNames() -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - bool StorageAzureBlob::supportsPartitionBy() const { return true; diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 5571ec6d902..ae70b111fb0 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -94,8 +94,6 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 541b2934892..ce695d5f009 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -31,7 +31,6 @@ #include #include #include -#include "Storages/StorageInMemoryMetadata.h" #include #include @@ -106,8 +105,6 @@ #include #include -#include - #include #include #include diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 51e4ccd4da3..cf5b78305a5 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -9,7 +9,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8eaf794f445..a734d3967eb 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2255,9 +2255,4 @@ StorageFile::ArchiveInfo StorageFile::getArchiveInfo( return archive_info; } -Names StorageFile::getVirtualColumnNames() -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 5535dba36e5..93c263008a6 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -80,8 +80,6 @@ public: bool storesDataOnDisk() const override; Strings getDataPaths() const override; - static Names getVirtualColumnNames(); - static Strings getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read); /// Check if the format supports reading only some subset of columns. diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 6c7a8e06cc3..69e6085bab9 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -41,7 +41,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25cc38f7834..7d959b05c94 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -22,7 +22,6 @@ #include #include "StorageLogSettings.h" -#include "Storages/StorageSnapshot.h" #include #include #include diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 7e953889b34..aeba01fdf8a 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -22,7 +21,6 @@ #include #include -#include "Storages/StorageSnapshot.h" #include #include #include diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0ddfc2a6bb4..00513d1ec46 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -61,7 +60,6 @@ #include #include #include -#include "Storages/StorageSnapshot.h" #include #include diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 661750fb6dd..556649f622d 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -5,7 +5,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" namespace DB diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 691d197aee6..afd34792335 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1975,11 +1975,6 @@ void registerStorageOSS(StorageFactory & factory) return registerStorageS3Impl("OSS", factory); } -Names StorageS3::getVirtualColumnNames() -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - bool StorageS3::supportsPartitionBy() const { return true; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 29a4e46eb9a..7c5db76aadf 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -336,8 +336,6 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 71d2809e18a..1893d65a64c 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -5,7 +5,6 @@ #include #include #include -#include "Storages/ColumnsDescription.h" namespace DB { diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e0c5160a43b..ec5b6008c17 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1302,11 +1302,6 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad } } -Names IStorageURLBase::getVirtualColumnNames() -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - SchemaCache & IStorageURLBase::getSchemaCache(const ContextPtr & context) { static SchemaCache schema_cache(context->getConfigRef().getUInt("schema_inference_cache_max_elements_for_url", DEFAULT_SCHEMA_CACHE_ELEMENTS)); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 36cfa97463c..294b1f828bb 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -48,8 +48,6 @@ public: bool supportsPartitionBy() const override { return true; } - static Names getVirtualColumnNames(); - static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index 353c61e6347..d60cfcafc13 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -16,9 +16,6 @@ #include #include -#include "Storages/System/IStorageSystemOneBlock.h" -#include "Storages/VirtualColumnsDescription.h" -#include namespace DB { diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index e4f07e3c4bf..60b17af4a45 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -1,7 +1,6 @@ #pragma once #include -#include "Interpreters/StorageID.h" namespace DB diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b4900d26470..075e1c62323 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -2,7 +2,6 @@ #include #include #include -#include "Storages/VirtualColumnsDescription.h" #include #include #include diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index c2b61c49e4a..21f05953714 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -343,7 +343,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex } } -Names getVirtualNamesForFileLikeStorage() +NameSet getVirtualNamesForFileLikeStorage() { return {"_path", "_file", "_size"}; } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index a3aa3b6adc6..3a83874c13e 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -5,7 +5,6 @@ #include #include #include -#include "Storages/ColumnsDescription.h" #include @@ -58,7 +57,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) return res; } -Names getVirtualNamesForFileLikeStorage(); +NameSet getVirtualNamesForFileLikeStorage(); VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp index d9f9cbe047e..5dd249c5190 100644 --- a/src/Storages/VirtualColumnsDescription.cpp +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -1,6 +1,3 @@ -#include "Core/NamesAndTypes.h" -#include "DataTypes/Serializations/ISerialization.h" -#include "base/types.h" #include namespace DB diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index b697f3df925..1a58be4f75b 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -37,6 +38,11 @@ bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & c return format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); } +NameSet ITableFunctionFileLike::getVirtualsToCheckBeforeUsingStructureHint() const +{ + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); +} + void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) { /// Parse args diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index c8412905e44..ba1b7d2bb3f 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Core/Names.h" #include "Parsers/IAST_fwd.h" namespace DB @@ -29,6 +30,8 @@ public: bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; + NameSet getVirtualsToCheckBeforeUsingStructureHint() const override; + static size_t getMaxNumberOfArguments() { return 4; } static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 066d6338b6a..f9645b8323d 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -348,8 +349,7 @@ bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const Context std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageAzureBlob::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index b481076e9b6..28bf72e07fb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -112,12 +112,6 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionFile::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageFile::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionFile(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index c1924028b49..aaf5ba8873a 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -22,8 +22,6 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - protected: int fd = -1; String path_to_archive; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 2dac4398144..45829245551 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -41,12 +41,6 @@ ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionHDFS::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageHDFS::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionHDFS(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index 3a719496b26..f1c0b8a7eae 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -36,8 +36,6 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 3fedd38277c..7af1675e19e 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -401,8 +402,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageS3::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index a78b2affa9a..2bdc0b449e0 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -159,12 +159,6 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionURL::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageURL::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - std::optional TableFunctionURL::tryGetFormatFromFirstArgument() { return FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath()); diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 54e223283ba..a1efddb84c6 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -36,8 +36,6 @@ public: static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context); - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - protected: void parseArguments(const ASTPtr & ast, ContextPtr context) override; void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override; From 1a1e78cf63b994dfb837a00602378c78c3e4c011 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Mar 2024 18:52:14 +0000 Subject: [PATCH 136/356] temporary remove virtual column --- src/Storages/MergeTree/IMergeTreeReader.cpp | 2 +- src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp | 2 +- src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 10 ---------- src/Storages/MergeTreeVirtualColumns.cpp | 4 ---- src/Storages/MergeTreeVirtualColumns.h | 7 ------- 5 files changed, 2 insertions(+), 23 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 8bef26ca240..30be1aa1c56 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -93,7 +93,7 @@ void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const it->name, it->type->getName(), virtual_column->type->getName()); } - if (it->name == "_part_offset" || it->name == BlockOffsetColumn::name) + if (it->name == "_part_offset") throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); Field field; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 7d54d3867ac..e84ed0a8068 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -278,7 +278,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( .withVirtuals() .withSubcolumns(with_subcolumns); - static const NameSet columns_to_read_at_first_step = {"_part_offset", BlockOffsetColumn::name}; + static const NameSet columns_to_read_at_first_step = {"_part_offset"}; NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index fb0bc617aa4..2eb010c54ec 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -210,16 +210,6 @@ static void fillBlockNumberColumns( { res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); } - else if (it->name == BlockOffsetColumn::name) - { - auto column = BlockOffsetColumn::type->createColumn(); - auto & block_offset_data = assert_cast(*column).getData(); - - block_offset_data.resize(num_rows); - std::iota(block_offset_data.begin(), block_offset_data.end(), block_offset); - - res_columns[i] = std::move(column); - } } } diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 885e46c6828..8250ceda7fa 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -26,10 +26,6 @@ const String BlockNumberColumn::name = "_block_number"; const DataTypePtr BlockNumberColumn::type = std::make_shared(); const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4(); -const String BlockOffsetColumn::name = "_block_offset"; -const DataTypePtr BlockOffsetColumn::type = std::make_shared(); -const ASTPtr BlockOffsetColumn::codec = getCompressionCodecDeltaLZ4(); - Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part) { if (column_name == RowExistsColumn::name) diff --git a/src/Storages/MergeTreeVirtualColumns.h b/src/Storages/MergeTreeVirtualColumns.h index cd9fe544ed8..24721bf1ad1 100644 --- a/src/Storages/MergeTreeVirtualColumns.h +++ b/src/Storages/MergeTreeVirtualColumns.h @@ -21,13 +21,6 @@ struct BlockNumberColumn static const ASTPtr codec; }; -struct BlockOffsetColumn -{ - static const String name; - static const DataTypePtr type; - static const ASTPtr codec; -}; - Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part); } From 754ae8792dcfd48219e479a71afc6f23a260d3ec Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 1 Mar 2024 19:29:42 +0000 Subject: [PATCH 137/356] Always apply first minmax index among available skip indices --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 13 +++++++++++++ src/Storages/IndicesDescription.h | 1 - src/Storages/MergeTree/MergeTreeIndexHypothesis.h | 1 - src/Storages/MergeTree/MergeTreeIndexMinMax.h | 2 -- src/Storages/MergeTree/MergeTreeIndexSet.h | 3 --- src/Storages/MergeTree/MergeTreeIndices.h | 3 --- src/Storages/MergeTree/MergeTreeReaderStream.h | 1 - .../03000_minmax_index_first.reference | 2 ++ .../0_stateless/03000_minmax_index_first.sql | 15 +++++++++++++++ 9 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/03000_minmax_index_first.reference create mode 100644 tests/queries/0_stateless/03000_minmax_index_first.sql diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c7b9eb72d4d..8f0db044d8f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -1421,6 +1422,18 @@ static void buildIndexes( } } + // move minmax indices to first positions, so they will be applied first as cheapest ones + std::sort(begin(skip_indexes.useful_indices), end(skip_indexes.useful_indices), [](const auto & l, const auto & r) + { + if (typeid_cast(l.index.get())) + return true; // left is min max + + if (typeid_cast(r.index.get())) + return false; // right is min max but left is not + + return true; + }); + indexes->skip_indexes = std::move(skip_indexes); } diff --git a/src/Storages/IndicesDescription.h b/src/Storages/IndicesDescription.h index e56642b8c76..21ba5fb632e 100644 --- a/src/Storages/IndicesDescription.h +++ b/src/Storages/IndicesDescription.h @@ -2,7 +2,6 @@ #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index 2296e1b717d..130e708d76f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 1e2abe6983f..dca26fb7b28 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index ea9f7ddef3d..7c66ba1a867 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -5,9 +5,6 @@ #include -#include -#include - namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 4749470bedd..8fdadb4e5eb 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -1,12 +1,9 @@ #pragma once #include -#include #include #include #include -#include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index baf8ec713f9..49ce3103434 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -1,5 +1,4 @@ #pragma once -#include #include #include #include diff --git a/tests/queries/0_stateless/03000_minmax_index_first.reference b/tests/queries/0_stateless/03000_minmax_index_first.reference new file mode 100644 index 00000000000..7cf792d8ed4 --- /dev/null +++ b/tests/queries/0_stateless/03000_minmax_index_first.reference @@ -0,0 +1,2 @@ +Name: v_mm +Name: v_set diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql new file mode 100644 index 00000000000..ee2cb2f4dfa --- /dev/null +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS skip_table; + +CREATE TABLE skip_table +( + k UInt64, + v UInt64, + INDEX v_set v TYPE set(100) GRANULARITY 2, + INDEX v_mm v TYPE minmax GRANULARITY 2 +) +ENGINE = MergeTree +PRIMARY KEY k; + +INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(1000000); + +SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain ilike '%Name%'; From 4baa60d5ffd94dffba89394d38d7514782a9cb07 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 1 Mar 2024 19:36:05 +0000 Subject: [PATCH 138/356] Comment in test --- tests/queries/0_stateless/03000_minmax_index_first.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql index ee2cb2f4dfa..4db232880de 100644 --- a/tests/queries/0_stateless/03000_minmax_index_first.sql +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -4,7 +4,7 @@ CREATE TABLE skip_table ( k UInt64, v UInt64, - INDEX v_set v TYPE set(100) GRANULARITY 2, + INDEX v_set v TYPE set(100) GRANULARITY 2, -- set index is declared before minmax intentionally INDEX v_mm v TYPE minmax GRANULARITY 2 ) ENGINE = MergeTree From aa42d5a88da0c16706b95631285db77d2dca9bdf Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 1 Mar 2024 20:58:22 +0000 Subject: [PATCH 139/356] separate FunctionSecretArgumentsFinder for AST and TreeNode to avoid binary bloat due to linkage dependencies --- .../FunctionSecretArgumentsFinderTreeNode.h | 1 + src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++-- src/Parsers/ASTFunction.cpp | 6 +++--- src/Parsers/FunctionSecretArgumentsFinder.cpp | 19 ------------------- src/Parsers/FunctionSecretArgumentsFinder.h | 6 +----- 5 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 src/Parsers/FunctionSecretArgumentsFinder.cpp diff --git a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h index 7e9a31868b2..439ddffe5e5 100644 --- a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h +++ b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h @@ -7,6 +7,7 @@ #include #include #include +#include #include diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 819a3758e26..10866e5eed4 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -5127,7 +5127,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi /// Mask arguments if needed if (!scope.context->getSettingsRef().format_display_secrets_in_show_and_select) { - if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinder::find(*function_node_ptr); secret_arguments.count) + if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinderTreeNode(*function_node_ptr).getResult(); secret_arguments.count) { auto & argument_nodes = function_node_ptr->getArgumentsNode()->as().getNodes(); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 4dac9c090f6..4537ced14cf 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -693,7 +693,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder::find(*this); + secret_arguments = FunctionSecretArgumentsFinderAST(*this).getResult(); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { @@ -757,7 +757,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - return (FunctionSecretArgumentsFinder::find(*this).hasSecrets()) || childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinderAST(*this).getResult().hasSecrets()) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) diff --git a/src/Parsers/FunctionSecretArgumentsFinder.cpp b/src/Parsers/FunctionSecretArgumentsFinder.cpp deleted file mode 100644 index bdeb29a37ba..00000000000 --- a/src/Parsers/FunctionSecretArgumentsFinder.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include -#include - - -namespace DB -{ - -FunctionSecretArgumentsFinder::Result FunctionSecretArgumentsFinder::find(const ASTFunction & function) -{ - return FunctionSecretArgumentsFinderAST(function).getResult(); -} - -FunctionSecretArgumentsFinder::Result FunctionSecretArgumentsFinder::find(const FunctionNode & function) -{ - return FunctionSecretArgumentsFinderTreeNode(function).getResult(); -} - -} diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h index 950d913fe20..002ad94f6ea 100644 --- a/src/Parsers/FunctionSecretArgumentsFinder.h +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -1,7 +1,6 @@ #pragma once -#include -#include +#include namespace DB { @@ -24,9 +23,6 @@ public: return count != 0 || !nested_maps.empty(); } }; - - static Result find(const ASTFunction & function); - static Result find(const FunctionNode & function); }; } From 1b9e6c936e7e8123fd1d0c712c3a051a5bbbe89a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Mar 2024 22:29:56 +0000 Subject: [PATCH 140/356] better interfaces for virtual columns --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Interpreters/InterpreterDescribeQuery.cpp | 6 ++- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- .../InterpreterShowColumnsQuery.cpp | 2 +- src/Interpreters/JoinedTables.cpp | 2 +- src/Interpreters/MutationsInterpreter.cpp | 3 +- src/Interpreters/getTableExpressions.cpp | 4 +- .../processColumnTransformers.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 4 +- src/Storages/AlterCommands.cpp | 6 +-- src/Storages/HDFS/StorageHDFS.cpp | 6 +-- src/Storages/HDFS/StorageHDFSCluster.cpp | 2 +- src/Storages/IStorage.h | 4 +- src/Storages/MergeTree/MergeTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 6 +-- .../MergeTree/MergeTreeSequentialSource.cpp | 3 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 2 +- src/Storages/NATS/NATSSource.cpp | 6 +-- .../StorageMaterializedPostgreSQL.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQSource.cpp | 6 +-- src/Storages/S3Queue/StorageS3Queue.cpp | 6 +-- src/Storages/StorageAzureBlob.cpp | 6 +-- src/Storages/StorageAzureBlobCluster.cpp | 2 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageFileCluster.cpp | 2 +- src/Storages/StorageMaterializedMySQL.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMerge.cpp | 2 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageSnapshot.cpp | 12 ++--- src/Storages/StorageURL.cpp | 6 +-- src/Storages/StorageURLCluster.cpp | 2 +- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemJemalloc.cpp | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/VirtualColumnsDescription.cpp | 44 +++++++++---------- src/Storages/VirtualColumnsDescription.h | 14 +++--- src/Storages/prepareReadingFromFormat.cpp | 6 +-- src/Storages/prepareReadingFromFormat.h | 2 +- 41 files changed, 99 insertions(+), 99 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a1b63960d40..3ae7a9d9f11 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -904,7 +904,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat void validateVirtualColumns(const IStorage & storage) { - auto virtual_columns = storage.getVirtualsDescription(); + auto virtual_columns = storage.getVirtualsPtr(); for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns()) { if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent)) diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 11542931775..04d44e34fff 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -155,7 +155,8 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName()); if (table) { - for (const auto & column : *table->getVirtualsDescription()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) virtual_columns.push_back(column); @@ -178,7 +179,8 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t if (settings.describe_include_virtual_columns) { - for (const auto & column : *table->getVirtualsDescription()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) virtual_columns.push_back(column); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 724cfca6a80..87013151a1a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -133,7 +133,7 @@ Block InterpreterInsertQuery::getSampleBlock( if (auto * window_view = dynamic_cast(table.get())) return window_view->getInputHeader(); else if (no_destination) - return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); + return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtualsList()); else return metadata_snapshot->getSampleBlockNonMaterialized(); } diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index 149ba6d7575..f32ebceaa63 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -107,7 +107,7 @@ SELECT '' AS extra )"; // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see - // IStorage::getVirtuals(). We can't easily do that via SQL. + // IStorage::getVirtualsList(). We can't easily do that via SQL. if (query.full) { diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 9be8bf178a1..49693332280 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -272,7 +272,7 @@ void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr & auto & table = tables_with_columns.back(); table.addHiddenColumns(storage_columns.getMaterialized()); table.addHiddenColumns(storage_columns.getAliases()); - table.addHiddenColumns(storage->getVirtuals()); + table.addHiddenColumns(storage->getVirtualsList()); } else tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList()); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 7a0d68f7c87..6641c6b740c 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -349,7 +349,8 @@ bool MutationsInterpreter::Source::isCompactPart() const static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) { auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical(); - for (const auto & column : storage.getVirtuals()) + auto virtuals = storage.getVirtualsPtr(); + for (const auto & column : *virtuals) all_columns.push_back(column.name); return all_columns; } diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 70e38526648..2853be4c05e 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -99,7 +99,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = function_storage->getVirtuals(); + virtuals = function_storage->getVirtualsList(); } else if (table_expression.database_and_table_name) { @@ -110,7 +110,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = table->getVirtuals(); + virtuals = table->getVirtualsList(); } return names_and_type_list; diff --git a/src/Interpreters/processColumnTransformers.cpp b/src/Interpreters/processColumnTransformers.cpp index 2a704d4a937..5ef331eb119 100644 --- a/src/Interpreters/processColumnTransformers.cpp +++ b/src/Interpreters/processColumnTransformers.cpp @@ -32,7 +32,7 @@ ASTPtr processColumnTransformers( tables_with_columns[0].addHiddenColumns(columns.getMaterialized()); tables_with_columns[0].addHiddenColumns(columns.getAliases()); - tables_with_columns[0].addHiddenColumns(table->getVirtuals()); + tables_with_columns[0].addHiddenColumns(table->getVirtualsList()); NameSet source_columns_set; for (const auto & identifier : query_columns->children) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index c4b707a0ce0..f051029c3dc 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -214,7 +214,7 @@ Chain buildPushingToViewsChain( /// If we don't write directly to the destination /// then expect that we're inserting with precalculated virtual columns - auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()) + auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtualsList()) : metadata_snapshot->getSampleBlock(); /** TODO This is a very important line. At any insertion into the table one of chains should own lock. @@ -574,7 +574,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat views_data.source_storage_id, views_data.source_metadata_snapshot->getColumns(), std::move(block), - *views_data.source_storage->getVirtualsDescription())); + *views_data.source_storage->getVirtualsPtr())); QueryPipelineBuilder pipeline; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index e1c80112c72..086e7152fda 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1244,7 +1244,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { const auto & metadata = table->getInMemoryMetadata(); - const auto & virtuals = *table->getVirtualsDescription(); + auto virtuals = table->getVirtualsPtr(); auto all_columns = metadata.columns; /// Default expression for all added/modified columns @@ -1281,7 +1281,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (command.data_type->hasDynamicSubcolumns()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); - if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name)); @@ -1495,7 +1495,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: column with this name already exists", backQuote(command.rename_to)); - if (virtuals.tryGet(command.rename_to, VirtualsKind::Persistent)) + if (virtuals->tryGet(command.rename_to, VirtualsKind::Persistent)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to)); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 19b2817510d..c574f57fc6a 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -974,7 +974,7 @@ void StorageHDFS::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context_->getSettingsRef().optimize_count_from_files; @@ -1010,7 +1010,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) else if (storage->is_path_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtuals(), context); + auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([glob_iterator]() { return glob_iterator->next(); @@ -1018,7 +1018,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) } else { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtuals(), context); + auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([uris_iterator]() { return uris_iterator->next(); diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 8c6d7ce5670..bde8b84e349 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -88,7 +88,7 @@ void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB: RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, predicate, getVirtuals(), context); + auto iterator = std::make_shared(uri, predicate, getVirtualsList(), context); auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e7ebb45cb46..f8d73038e09 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -229,8 +229,8 @@ public: /// virtual column will be overridden and inaccessible. /// /// By default return empty list of columns. - VirtualsDescriptionPtr getVirtualsDescription() const { return virtuals.get(); } - NamesAndTypesList getVirtuals() const { return virtuals.get()->getNamesAndTypesList(); } + VirtualsDescriptionPtr getVirtualsPtr() const { return virtuals.get(); } + NamesAndTypesList getVirtualsList() const { return virtuals.get()->getNamesAndTypesList(); } Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); } Names getAllRegisteredNames() const override; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 4621314cb98..aa38198334e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1074,7 +1074,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() if (global_ctx->deduplicate) { - const auto & virtuals = *global_ctx->data->getVirtualsDescription(); + const auto & virtuals = *global_ctx->data->getVirtualsPtr(); /// We don't want to deduplicate by virtual persistent column. /// If deduplicate_by_columns is empty, add all columns except virtuals. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0b7ac39aa1b..7b1d08642e2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1031,7 +1031,7 @@ const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id Block MergeTreeData::getHeaderWithVirtualsForFilter() const { Block header; - auto virtuals_desc = getVirtualsDescription(); + auto virtuals_desc = getVirtualsPtr(); for (const auto & name : virtuals_useful_for_filter) if (auto column = virtuals_desc->tryGet(name)) header.insert({column->type->createColumn(), column->type, name}); @@ -3648,7 +3648,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); - const auto & virtuals = *getVirtualsDescription(); + auto virtuals = getVirtualsPtr(); if (!hasDynamicSubcolumns(columns)) return; @@ -3656,7 +3656,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts const auto & part_columns = part->getColumns(); for (const auto & part_column : part_columns) { - if (virtuals.has(part_column.name)) + if (virtuals->has(part_column.name)) continue; auto storage_column = columns.getPhysical(part_column.name); diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 2eb010c54ec..0d6b1d88075 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -195,7 +195,6 @@ static void fillBlockNumberColumns( Columns & res_columns, const NamesAndTypesList & columns_list, UInt64 block_number, - UInt64 block_offset, UInt64 num_rows) { chassert(res_columns.size() == columns_list.size()); @@ -231,7 +230,7 @@ try if (rows_read) { - fillBlockNumberColumns(columns, sample, data_part->info.min_block, current_row, rows_read); + fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read); reader->fillVirtualColumns(columns, rows_read); current_row += rows_read; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b3c36f7180b..56f832630b7 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -168,7 +168,7 @@ static void splitAndModifyMutationCommands( { if (!mutated_columns.contains(column.name)) { - if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtuals().contains(column.name)) + if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtualsPtr()->has(column.name)) { /// We cannot add the column because there's no such column in table. /// It's okay if the column was dropped. It may also absent in dropped_columns diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index da4405dfd40..ca8ed9abdb5 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -34,7 +34,7 @@ public: , partition_id(part_->info.partition_id) { setInMemoryMetadata(storage.getInMemoryMetadata()); - setVirtuals(*storage.getVirtualsDescription()); + setVirtuals(*storage.getVirtualsPtr()); } /// Used in queries with projection. diff --git a/src/Storages/NATS/NATSSource.cpp b/src/Storages/NATS/NATSSource.cpp index 3fc01eacb22..54f479faacc 100644 --- a/src/Storages/NATS/NATSSource.cpp +++ b/src/Storages/NATS/NATSSource.cpp @@ -9,10 +9,10 @@ namespace DB { -static std::pair getHeaders(StorageNATS & storage, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) { auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); return {non_virtual_header, virtual_header}; } @@ -33,7 +33,7 @@ NATSSource::NATSSource( const Names & columns, size_t max_block_size_, StreamingHandleErrorMode handle_error_mode_) - : NATSSource(storage_, storage_snapshot_, getHeaders(storage_, storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) + : NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) { } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index c9866146a8b..64d329f74b2 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -128,7 +128,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( , nested_table_id(nested_storage_->getStorageID()) { setInMemoryMetadata(nested_storage_->getInMemoryMetadata()); - setVirtuals(*nested_storage_->getVirtualsDescription()); + setVirtuals(*nested_storage_->getVirtualsPtr()); } VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals() diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 72196e7dd3c..4dc257074f3 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -11,10 +11,10 @@ namespace DB { -static std::pair getHeaders(StorageRabbitMQ & storage_, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) { auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage_.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); return {non_virtual_header, virtual_header}; } @@ -40,7 +40,7 @@ RabbitMQSource::RabbitMQSource( : RabbitMQSource( storage_, storage_snapshot_, - getHeaders(storage_, storage_snapshot_), + getHeaders(storage_snapshot_), context_, columns, max_block_size_, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 765fcbd9684..6e7ac2b47b8 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -314,7 +314,7 @@ void StorageS3Queue::read( } auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); auto reading = std::make_unique( column_names, @@ -492,7 +492,7 @@ bool StorageS3Queue::streamToViews() auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); Pipes pipes; pipes.reserve(s3queue_settings->s3queue_processing_threads_num); @@ -601,7 +601,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, getVirtuals(), local_context, + *configuration.client, configuration.url, predicate, getVirtualsList(), local_context, /* read_keys */nullptr, configuration.request_settings); return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 92dbc1025db..bd88620c55e 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -736,7 +736,7 @@ void StorageAzureBlob::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -772,13 +772,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtuals(), context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtuals(), context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index 155a7220885..a80d121567a 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -80,7 +80,7 @@ RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension { auto iterator = std::make_shared( object_storage.get(), configuration.container, configuration.blob_path, - predicate, getVirtuals(), context, nullptr); + predicate, getVirtualsList(), context, nullptr); auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index a734d3967eb..0d220f2fd5d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1580,7 +1580,7 @@ void StorageFile::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context->getSettingsRef().optimize_count_from_files; @@ -1608,7 +1608,7 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate) storage->paths, storage->archive_info, predicate, - storage->getVirtuals(), + storage->getVirtualsList(), context, storage->distributed_processing); } diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index 84691472809..d43e242f70c 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -76,7 +76,7 @@ void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const Sto RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(paths, std::nullopt, predicate, getVirtuals(), context); + auto iterator = std::make_shared(paths, std::nullopt, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 1651b0499ad..887c58ff816 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -23,7 +23,7 @@ StorageMaterializedMySQL::StorageMaterializedMySQL(const StoragePtr & nested_sto : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_) { setInMemoryMetadata(nested_storage->getInMemoryMetadata()); - setVirtuals(*nested_storage->getVirtualsDescription()); + setVirtuals(*nested_storage->getVirtualsPtr()); } bool StorageMaterializedMySQL::needRewriteQueryWithFinal(const Names & column_names) const diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index aeba01fdf8a..668dc84481d 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -168,7 +168,7 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( StorageSnapshotPtr StorageMaterializedView::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const { /// We cannot set virtuals at table creation because target table may not exist at that time. - return std::make_shared(*this, metadata_snapshot, getTargetTable()->getVirtualsDescription()); + return std::make_shared(*this, metadata_snapshot, getTargetTable()->getVirtualsPtr()); } void StorageMaterializedView::read( diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 00513d1ec46..3947ae6c1f3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -325,7 +325,7 @@ StorageSnapshotPtr StorageMerge::getStorageSnapshot(const StorageMetadataPtr & m auto virtuals = common_virtuals; if (auto first_table = getFirstTable([](auto && table) { return table; })) { - auto table_virtuals = first_table->getVirtualsDescription(); + auto table_virtuals = first_table->getVirtualsPtr(); for (const auto & column : *table_virtuals) { if (virtuals.has(column.name)) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index afd34792335..c7cb0163a0a 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -165,7 +165,7 @@ public: , num_streams(num_streams_) { query_configuration = storage.updateConfigurationAndGetCopy(context); - virtual_columns = storage.getVirtuals(); + virtual_columns = storage.getVirtualsList(); } private: @@ -1142,7 +1142,7 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 7641c66eefd..8a13c2c6ca5 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -93,7 +93,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, getVirtuals(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); + *s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 1893d65a64c..e5c1d3d1dea 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -22,7 +22,7 @@ StorageSnapshot::StorageSnapshot( StorageMetadataPtr metadata_) : storage(storage_) , metadata(std::move(metadata_)) - , virtual_columns(storage_.getVirtualsDescription()) + , virtual_columns(storage_.getVirtualsPtr()) { } @@ -42,7 +42,7 @@ StorageSnapshot::StorageSnapshot( ColumnsDescription object_columns_) : storage(storage_) , metadata(std::move(metadata_)) - , virtual_columns(storage_.getVirtualsDescription()) + , virtual_columns(storage_.getVirtualsPtr()) , object_columns(std::move(object_columns_)) { } @@ -54,7 +54,7 @@ StorageSnapshot::StorageSnapshot( DataPtr data_) : storage(storage_) , metadata(std::move(metadata_)) - , virtual_columns(storage_.getVirtualsDescription()) + , virtual_columns(storage_.getVirtualsPtr()) , object_columns(std::move(object_columns_)) , data(std::move(data_)) { @@ -83,7 +83,7 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) for (const auto & column : all_columns) column_names.insert(column.name); - auto virtuals_list = virtual_columns->get(options.virtuals_kind); + auto virtuals_list = virtual_columns->getNamesAndTypesList(options.virtuals_kind); for (const auto & column : virtuals_list) { if (column_names.contains(column.name)) @@ -150,7 +150,7 @@ CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_nam if (const auto * column_desc = columns.tryGet(column_name)) return get_codec_or_default(*column_desc); - if (const auto virtual_desc = virtual_columns->tryGetDescription(column_name)) + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) return get_codec_or_default(*virtual_desc); return default_codec; @@ -172,7 +172,7 @@ ASTPtr StorageSnapshot::getCodecDescOrDefault(const String & column_name, Compre if (const auto * column_desc = columns.tryGet(column_name)) return get_codec_or_default(*column_desc); - if (const auto virtual_desc = virtual_columns->tryGetDescription(column_name)) + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) return get_codec_or_default(*virtual_desc); return default_codec->getFullCodecDesc(); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index ec5b6008c17..039be222e7e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1058,7 +1058,7 @@ void IStorageURLBase::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -1126,7 +1126,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) else if (is_url_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->getVirtuals(), context); + auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->getVirtualsList(), context); /// check if we filtered out all the paths if (glob_iterator->size() == 0) @@ -1229,7 +1229,7 @@ void StorageURLWithFailover::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 334c2ca249b..2e7c63d0097 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -90,7 +90,7 @@ void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageS RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, getVirtuals(), context); + auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 3b2807965a4..910bbaa13c2 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -52,7 +52,7 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); if (supportsColumnsMask()) { diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 15543208dd9..b55e32c479c 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -115,7 +115,7 @@ Pipe StorageSystemJemallocBins::read( { storage_snapshot->check(column_names); - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); MutableColumns res_columns = header.cloneEmptyColumns(); fillJemallocBins(res_columns); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index f2a4de87efd..d1bf86ba8ef 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -248,7 +248,7 @@ void StorageSystemZooKeeper::read( size_t max_block_size, size_t /*num_streams*/) { - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); auto read_step = std::make_unique( column_names, query_info, diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp index 5dd249c5190..64097224ed9 100644 --- a/src/Storages/VirtualColumnsDescription.cpp +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -34,15 +34,6 @@ void VirtualColumnsDescription::addPersistent(String name, DataTypePtr type, AST add({std::move(name), std::move(type), std::move(codec), std::move(comment), VirtualsKind::Persistent}); } -NamesAndTypesList VirtualColumnsDescription::get(VirtualsKind kind) const -{ - NamesAndTypesList result; - for (const auto & column : container) - if (static_cast(column.kind) & static_cast(kind)) - result.emplace_back(column.name, column.type); - return result; -} - std::optional VirtualColumnsDescription::tryGet(const String & name, VirtualsKind kind) const { auto it = container.get<1>().find(name); @@ -59,30 +50,22 @@ NameAndTypePair VirtualColumnsDescription::get(const String & name, VirtualsKind return *column; } -std::optional VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const +const VirtualColumnDescription * VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const { auto it = container.get<1>().find(name); if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) - return *it; - return {}; + return &(*it); + return nullptr; } -VirtualColumnDescription VirtualColumnsDescription::getDescription(const String & name, VirtualsKind kind) const +const VirtualColumnDescription & VirtualColumnsDescription::getDescription(const String & name, VirtualsKind kind) const { - auto column = tryGetDescription(name, kind); + const auto * column = tryGetDescription(name, kind); if (!column) throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); return *column; } -NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList() const -{ - NamesAndTypesList result; - for (const auto & desc : container) - result.emplace_back(desc.name, desc.type); - return result; -} - Block VirtualColumnsDescription::getSampleBlock() const { Block result; @@ -91,4 +74,21 @@ Block VirtualColumnsDescription::getSampleBlock() const return result; } +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & desc : container) + result.emplace_back(desc.name, desc.type); + return result; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList(VirtualsKind kind) const +{ + NamesAndTypesList result; + for (const auto & column : container) + if (static_cast(column.kind) & static_cast(kind)) + result.emplace_back(column.name, column.type); + return result; +} + } diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h index 3d79167c623..2f46bbcab82 100644 --- a/src/Storages/VirtualColumnsDescription.h +++ b/src/Storages/VirtualColumnsDescription.h @@ -49,17 +49,15 @@ public: NameAndTypePair get(const String & name) const { return get(name, VirtualsKind::All); } std::optional tryGet(const String & name) const { return tryGet(name, VirtualsKind::All); } - std::optional tryGetDescription(const String & name, VirtualsKind kind) const; - VirtualColumnDescription getDescription(const String & name, VirtualsKind kind) const; + const VirtualColumnDescription * tryGetDescription(const String & name, VirtualsKind kind) const; + const VirtualColumnDescription & getDescription(const String & name, VirtualsKind kind) const; - std::optional tryGetDescription(const String & name) const { return tryGetDescription(name, VirtualsKind::All); } - VirtualColumnDescription getDescription(const String & name) const { return getDescription(name, VirtualsKind::All); } - - NamesAndTypesList get(VirtualsKind kind) const; - NamesAndTypesList getNamesAndTypesList() const; + const VirtualColumnDescription * tryGetDescription(const String & name) const { return tryGetDescription(name, VirtualsKind::All); } + const VirtualColumnDescription & getDescription(const String & name) const { return getDescription(name, VirtualsKind::All); } Block getSampleBlock() const; - Block getSampleBlock(const Names & names) const; + NamesAndTypesList getNamesAndTypesList() const; + NamesAndTypesList getNamesAndTypesList(VirtualsKind kind) const; private: Container container; diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index 6be4213ec6b..406b7f379f9 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -4,7 +4,7 @@ namespace DB { -ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns) { ReadFromFormatInfo info; /// Collect requested virtual columns and remove them from requested columns. @@ -12,11 +12,11 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c for (const auto & column_name : requested_columns) { bool is_virtual = false; - for (const auto & virtual_column : virtuals) + for (const auto & virtual_column : *storage_snapshot->virtual_columns) { if (column_name == virtual_column.name) { - info.requested_virtual_columns.push_back(virtual_column); + info.requested_virtual_columns.emplace_back(virtual_column.name, virtual_column.type); is_virtual = true; break; } diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h index c5f3959a550..e4d62c29ec6 100644 --- a/src/Storages/prepareReadingFromFormat.h +++ b/src/Storages/prepareReadingFromFormat.h @@ -22,5 +22,5 @@ namespace DB }; /// Get all needed information for reading from data in some input format. - ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns); } From 4735eb3ab419e9a9c21bdc58ef2bcc4ee2823353 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 1 Mar 2024 23:29:17 +0100 Subject: [PATCH 141/356] wait for pool jobs in case of exception on scheduling --- src/Interpreters/AsynchronousInsertQueue.cpp | 51 +++++++++++--------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c7a39ad610b..e25cedb916c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -214,34 +214,41 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo AsynchronousInsertQueue::~AsynchronousInsertQueue() { - LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); - shutdown = true; - - for (size_t i = 0; i < pool_size; ++i) + try { - auto & shard = queue_shards[i]; + LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); + shutdown = true; - shard.are_tasks_available.notify_one(); - assert(dump_by_first_update_threads[i].joinable()); - dump_by_first_update_threads[i].join(); - - if (flush_on_shutdown) - { - for (auto & [_, elem] : shard.queue) - scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i); - } - else + for (size_t i = 0; i < pool_size; ++i) { + auto & shard = queue_shards[i]; - for (auto & [_, elem] : shard.queue) - for (const auto & entry : elem.data->entries) - entry->finish(std::make_exception_ptr(Exception( - ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); + shard.are_tasks_available.notify_one(); + assert(dump_by_first_update_threads[i].joinable()); + dump_by_first_update_threads[i].join(); + + if (flush_on_shutdown) + { + for (auto & [_, elem] : shard.queue) + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i); + } + else + { + for (auto & [_, elem] : shard.queue) + for (const auto & entry : elem.data->entries) + entry->finish( + std::make_exception_ptr(Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); + } } + + pool.wait(); + LOG_TRACE(log, "Asynchronous insertion queue finished"); + } + catch (...) + { + tryLogCurrentException(log); + pool.wait(); } - - pool.wait(); - LOG_TRACE(log, "Asynchronous insertion queue finished"); } void AsynchronousInsertQueue::scheduleDataProcessingJob( From 420d98295daa84ef86f5a63b73a3c6dc9ff7ed7f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 1 Mar 2024 23:24:35 +0000 Subject: [PATCH 142/356] Fix: reorder indices only if necessary --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8f0db044d8f..d3b1a324de9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1423,15 +1423,17 @@ static void buildIndexes( } // move minmax indices to first positions, so they will be applied first as cheapest ones - std::sort(begin(skip_indexes.useful_indices), end(skip_indexes.useful_indices), [](const auto & l, const auto & r) + std::stable_sort(begin(skip_indexes.useful_indices), end(skip_indexes.useful_indices), [](const auto & l, const auto & r) { - if (typeid_cast(l.index.get())) - return true; // left is min max + const bool l_min_max = (typeid_cast(l.index.get())); + const bool r_min_max = (typeid_cast(r.index.get())); + if (l_min_max == r_min_max) + return false; - if (typeid_cast(r.index.get())) - return false; // right is min max but left is not + if (l_min_max) + return true; // left is min max but right is not - return true; + return false; // right is min max but left is not }); indexes->skip_indexes = std::move(skip_indexes); From 1a5afa90a16efb51064c8cff4944fb4c017da42a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 29 Feb 2024 17:43:06 -0800 Subject: [PATCH 143/356] fix(prql): Robust panic handler --- .gitignore | 2 +- rust/prql/src/lib.rs | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 5341f23a94f..1ea8f83dcc2 100644 --- a/.gitignore +++ b/.gitignore @@ -165,7 +165,7 @@ tests/queries/0_stateless/*.expect.history tests/integration/**/_gen # rust -/rust/**/target +/rust/**/target* # It is autogenerated from *.in /rust/**/.cargo/config.toml /rust/**/vendor diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs index 621f6aae5a2..e17229e0f38 100644 --- a/rust/prql/src/lib.rs +++ b/rust/prql/src/lib.rs @@ -57,14 +57,8 @@ pub unsafe extern "C" fn prql_to_sql( out: *mut *mut u8, out_size: *mut u64, ) -> i64 { - let ret = panic::catch_unwind(|| { - return prql_to_sql_impl(query, size, out, out_size); - }); - return match ret { - // NOTE: using cxxbridge we can return proper Result<> type. - Err(_err) => 1, - Ok(res) => res, - } + // NOTE: using cxxbridge we can return proper Result<> type. + panic::catch_unwind(|| prql_to_sql_impl(query, size, out, out_size)).unwrap_or(1) } #[no_mangle] From 57482de0f0858f5a03e60a7310227106487ce438 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Sat, 2 Mar 2024 01:16:47 +0100 Subject: [PATCH 144/356] Update FunctionBinaryArithmetic.h --- src/Functions/FunctionBinaryArithmetic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 9ad74f6332f..79e5ee442c2 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -171,7 +171,7 @@ public: using ResultDataType = Switch< /// Result must be Integer Case::int_div || IsOperation::int_div_or_zero, - std::conditional_t && IsDataTypeNumber, DataTypeFromFieldType, InvalidType>>, + std::conditional_t && IsDataTypeDecimalOrNumber, DataTypeFromFieldType, InvalidType>>, /// Decimal cases Case || IsDataTypeDecimal, DecimalResultDataType>, Case< From d15f7b21588ff3e8b14a71eb3e0d206a745464df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 01:42:59 +0100 Subject: [PATCH 145/356] Fix questionable behavior in the `parseDateTimeBestEffort` function --- src/IO/parseDateTimeBestEffort.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 9734ba1c84f..caf51d94bb3 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -582,11 +582,18 @@ ReturnType parseDateTimeBestEffortImpl( day_of_month = 1; if (!month) month = 1; + if (!year) { + /// If year is not specified, it will be the current year if the date is unknown or not greater than today, + /// otherwise it will be the previous year. + /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". + /// If you have questions, ask Victor Krasnov, https://www.linkedin.com/in/vickr/ + time_t now = time(nullptr); - UInt16 curr_year = local_time_zone.toYear(now); - year = now < local_time_zone.makeDateTime(curr_year, month, day_of_month, hour, minute, second) ? curr_year - 1 : curr_year; + auto today = local_time_zone.toDayNum(now); + UInt16 curr_year = local_time_zone.toYear(today); + year = local_time_zone.makeDayNum(curr_year, month, day_of_month) <= today ? curr_year : curr_year - 1; } auto is_leap_year = (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0); From d1728f0b4391e306aea2e2be449332d1907681f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 01:43:03 +0100 Subject: [PATCH 146/356] Revert "Merge pull request #60547 from Algunenano/syslog" This reverts commit 0ad0344dc78d761a20acad2a0b285c79dc4287e5, reversing changes made to ea8a271409163ee94565524696064d05a397fd9a. --- ...3_parsedatetimebesteffort_syslog.reference | 37 +++++++++++++ .../02783_parsedatetimebesteffort_syslog.sql | 54 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference create mode 100644 tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference new file mode 100644 index 00000000000..1340b3affe3 --- /dev/null +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference @@ -0,0 +1,37 @@ +The reference time point is 2023-06-30 23:59:30 +─────────────────────────────────────────────── +The argument is before the reference time point +─────────────────────────────────────────────── +Row 1: +────── +syslog_arg: Jun 30 23:58:30 +res: 2023-06-30 23:58:30 +res_null: 2023-06-30 23:58:30 +res_zero: 2023-06-30 23:58:30 +res_us: 2023-06-30 23:58:30 +res_us_null: 2023-06-30 23:58:30 +res_us_zero: 2023-06-30 23:58:30 +res64: 2023-06-30 23:58:30.000 +res64_null: 2023-06-30 23:58:30.000 +res64_zero: 2023-06-30 23:58:30.000 +res64_us: 2023-06-30 23:58:30.000 +res64_us_null: 2023-06-30 23:58:30.000 +res64_us_zero: 2023-06-30 23:58:30.000 +────────────────────────────────────────────── +The argument is after the reference time point +────────────────────────────────────────────── +Row 1: +────── +syslog_arg: Jul 1 00:00:30 +res: 2022-07-01 00:00:30 +res_null: 2022-07-01 00:00:30 +res_zero: 2022-07-01 00:00:30 +res_us: 2022-07-01 00:00:30 +res_us_null: 2022-07-01 00:00:30 +res_us_zero: 2022-07-01 00:00:30 +res64: 2022-07-01 00:00:30.000 +res64_null: 2022-07-01 00:00:30.000 +res64_zero: 2022-07-01 00:00:30.000 +res64_us: 2022-07-01 00:00:30.000 +res64_us_null: 2022-07-01 00:00:30.000 +res64_us_zero: 2022-07-01 00:00:30.000 diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql new file mode 100644 index 00000000000..c67722393ab --- /dev/null +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql @@ -0,0 +1,54 @@ +SET session_timezone = 'UTC'; + +SELECT 'The reference time point is 2023-06-30 23:59:30'; +SELECT '───────────────────────────────────────────────'; +SELECT 'The argument is before the reference time point'; +SELECT '───────────────────────────────────────────────'; + +WITH + toDateTime('2023-06-30 23:59:30') AS dt_ref, + now() AS dt_now, + date_sub(MINUTE, 1, dt_now) as dt_before, + dateDiff(SECOND, dt_ref, dt_now) AS time_shift, + formatDateTime(dt_before, '%b %e %T') AS syslog_before +SELECT + formatDateTime(dt_before - time_shift, '%b %e %T') AS syslog_arg, + parseDateTimeBestEffort(syslog_before) - time_shift AS res, + parseDateTimeBestEffortOrNull(syslog_before) - time_shift AS res_null, + parseDateTimeBestEffortOrZero(syslog_before) - time_shift AS res_zero, + parseDateTimeBestEffortUS(syslog_before) - time_shift AS res_us, + parseDateTimeBestEffortUSOrNull(syslog_before) - time_shift AS res_us_null, + parseDateTimeBestEffortUSOrZero(syslog_before) - time_shift AS res_us_zero, + parseDateTime64BestEffort(syslog_before) - time_shift AS res64, + parseDateTime64BestEffortOrNull(syslog_before) - time_shift AS res64_null, + parseDateTime64BestEffortOrZero(syslog_before) - time_shift AS res64_zero, + parseDateTime64BestEffortUS(syslog_before) - time_shift AS res64_us, + parseDateTime64BestEffortUSOrNull(syslog_before) - time_shift AS res64_us_null, + parseDateTime64BestEffortUSOrZero(syslog_before) - time_shift AS res64_us_zero +FORMAT Vertical; + +SELECT '──────────────────────────────────────────────'; +SELECT 'The argument is after the reference time point'; +SELECT '──────────────────────────────────────────────'; + +WITH + toDateTime('2023-06-30 23:59:30') AS dt_ref, + now() AS dt_now, + date_add(MINUTE, 1, dt_now) as dt_after, + dateDiff(SECOND, dt_ref, dt_now) AS time_shift, + formatDateTime(dt_after, '%b %e %T') AS syslog_after +SELECT + formatDateTime(dt_after - time_shift, '%b %e %T') AS syslog_arg, + parseDateTimeBestEffort(syslog_after) - time_shift AS res, + parseDateTimeBestEffortOrNull(syslog_after) - time_shift AS res_null, + parseDateTimeBestEffortOrZero(syslog_after) - time_shift AS res_zero, + parseDateTimeBestEffortUS(syslog_after) - time_shift AS res_us, + parseDateTimeBestEffortUSOrNull(syslog_after) - time_shift AS res_us_null, + parseDateTimeBestEffortUSOrZero(syslog_after) - time_shift AS res_us_zero, + parseDateTime64BestEffort(syslog_after) - time_shift AS res64, + parseDateTime64BestEffortOrNull(syslog_after) - time_shift AS res64_null, + parseDateTime64BestEffortOrZero(syslog_after) - time_shift AS res64_zero, + parseDateTime64BestEffortUS(syslog_after) - time_shift AS res64_us, + parseDateTime64BestEffortUSOrNull(syslog_after) - time_shift AS res64_us_null, + parseDateTime64BestEffortUSOrZero(syslog_after) - time_shift AS res64_us_zero +FORMAT Vertical; From fc7e66accbebc1e6576528d7412279dd6e3f03ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 04:41:44 +0100 Subject: [PATCH 147/356] Fix build --- src/Interpreters/InterpreterSystemQuery.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index a078d99facf..292c538c8c7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1209,7 +1209,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_DISK_METADATA_CACHE: - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); + case Type::DROP_DISTRIBUTED_CACHE: + case Type::STOP_VIRTUAL_PARTS_UPDATE: + case Type::START_VIRTUAL_PARTS_UPDATE: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only available in ClickHouse Cloud, https://clickhouse.cloud/"); + } case Type::RELOAD_DICTIONARY: case Type::RELOAD_DICTIONARIES: case Type::RELOAD_EMBEDDED_DICTIONARIES: From adeccecba93fcdfb81d0761c0f15b39d9bf2b471 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 05:40:19 +0100 Subject: [PATCH 148/356] Fix build --- src/Common/SystemLogBase.cpp | 6 ++++++ src/Common/SystemLogBase.h | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index aef4e19a70c..4dee6d905d9 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -260,4 +260,10 @@ void SystemLogBase::add(LogElement element) template void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } +#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; +SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) + +#define INSTANTIATE_SYSTEM_LOG_QUEUE(ELEMENT) template class SystemLogQueue; +SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE) + } diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index b4d6f2e98bb..95906c63349 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -29,7 +29,6 @@ M(TextLogElement) \ M(S3QueueLogElement) \ M(FilesystemCacheLogElement) \ - M(DistributedCacheLogElement) \ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ M(BackupLogElement) \ From 8785b0700c519ef831bc42036dbc6fb6aee44fe1 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 1 Mar 2024 00:48:31 +0000 Subject: [PATCH 149/356] CI: build_report job remove dependsies on yml #no_merge_comit #job_ClickHouse_build_check #job_ClickHouse_special_build_check #job_style_check --- .github/workflows/backport_branches.yml | 16 +---- .github/workflows/master.yml | 20 ++---- .github/workflows/pull_request.yml | 16 +---- .github/workflows/release_branches.yml | 18 +---- tests/ci/build_report_check.py | 93 ++++++++----------------- tests/ci/ci_config.py | 27 +++++-- tests/ci/pr_info.py | 6 ++ 7 files changed, 69 insertions(+), 127 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 92c07ed333a..2a98722414b 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -166,14 +166,8 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check - runner_type: style-checker + runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - additional_envs: | - NEEDS_DATA< 0 - ), "BUG. if not a PR, report must be created on the same branch" - build_results.append(build_result) - - # The code to collect missing reports for failed jobs - missing_job_names = [ - name - for name, data in needs_data.items() - if not any(1 for br in build_results if br.job_name.startswith(name)) - and data["result"] != "skipped" - ] - missing_builds = len(missing_job_names) - for job_name in reversed(missing_job_names): - build_result = BuildResult.missing_result("missing") - build_result.job_name = job_name - build_result.status = PENDING - logging.info( - "There is missing report for %s, created a dummy result %s", - job_name, - build_result, - ) - build_results.insert(0, build_result) + if build_name == Build.FUZZERS: + logging.info("Build [%s] is missing - skip", Build.FUZZERS) + continue + logging.warning("Build results for %s is missing", build_name) + build_result = BuildResult.missing_result("missing") + build_result.job_name = build_name + build_result.status = PENDING + logging.info( + "There is missing report for %s, created a dummy result %s", + build_name, + build_result, + ) + missing_builds += 1 + build_results.insert(0, build_result) + else: + assert ( + pr_info.head_ref == build_result.head_ref or pr_info.number > 0 + ), "BUG. if not a PR, report must be created on the same branch" + build_results.append(build_result) # Calculate artifact groups like packages and binaries total_groups = sum(len(br.grouped_urls) for br in build_results) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 7ad418eba3c..dd175177858 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -276,6 +276,7 @@ class BuildReportConfig: builds: List[str] job_config: JobConfig = field( default_factory=lambda: JobConfig( + run_command='build_report_check.py "$CHECK_NAME"', digest=DigestConfig( include_paths=[ "./tests/ci/build_report_check.py", @@ -570,7 +571,25 @@ class CIConfig: for check_name in config: # type: ignore yield check_name - def get_builds_for_report(self, report_name: str) -> List[str]: + def get_builds_for_report( + self, report_name: str, release: bool = False, backport: bool = False + ) -> List[str]: + # hack to modify build list for release and bp wf + assert not (release and backport), "Invalid input" + if backport and report_name == JobNames.BUILD_CHECK: + return [ + Build.PACKAGE_RELEASE, + Build.PACKAGE_AARCH64, + Build.PACKAGE_ASAN, + Build.PACKAGE_TSAN, + Build.PACKAGE_DEBUG, + ] + if release and report_name == JobNames.BUILD_CHECK_SPECIAL: + return [ + Build.BINARY_DARWIN, + Build.BINARY_DARWIN_AARCH64, + ] + return self.builds_report_config[report_name].builds @classmethod @@ -837,9 +856,6 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, Build.PACKAGE_MSAN, Build.PACKAGE_DEBUG, - Build.PACKAGE_RELEASE_COVERAGE, - Build.BINARY_RELEASE, - Build.FUZZERS, ] ), JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( @@ -855,6 +871,9 @@ CI_CONFIG = CIConfig( Build.BINARY_S390X, Build.BINARY_AMD64_COMPAT, Build.BINARY_AMD64_MUSL, + Build.PACKAGE_RELEASE_COVERAGE, + Build.BINARY_RELEASE, + Build.FUZZERS, ] ), }, diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 4aac7ad73df..aba32d88c0a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,6 +2,7 @@ import json import logging import os +import re from typing import Dict, List, Set, Union from urllib.parse import quote @@ -288,6 +289,11 @@ class PRInfo: def is_master(self) -> bool: return self.number == 0 and self.head_ref == "master" + def is_release(self) -> bool: + return self.number == 0 and bool( + re.match(r"^2[1-9]\.[1-9][0-9]*$", self.head_ref) + ) + def is_release_branch(self) -> bool: return self.number == 0 From dff1bf335867c0e49d4e681ca80eb319a93f7760 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 17:17:49 +0100 Subject: [PATCH 150/356] Remove unit test for ColumnObject --- src/Columns/tests/gtest_column_object.cpp | 157 ---------------------- 1 file changed, 157 deletions(-) delete mode 100644 src/Columns/tests/gtest_column_object.cpp diff --git a/src/Columns/tests/gtest_column_object.cpp b/src/Columns/tests/gtest_column_object.cpp deleted file mode 100644 index bef16e4fb56..00000000000 --- a/src/Columns/tests/gtest_column_object.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -using namespace DB; - -static pcg64 rng(randomSeed()); - -Field getRandomField(size_t type) -{ - switch (type) - { - case 0: - return rng(); - case 1: - return std::uniform_real_distribution<>(0.0, 1.0)(rng); - case 2: - return std::string(rng() % 10, 'a' + rng() % 26); - default: - return Field(); - } -} - -std::pair> generate(size_t size) -{ - bool has_defaults = rng() % 3 == 0; - size_t num_defaults = has_defaults ? rng() % size : 0; - - ColumnObject::Subcolumn subcolumn(num_defaults, false); - std::vector fields; - - while (subcolumn.size() < size) - { - size_t part_size = rng() % (size - subcolumn.size()) + 1; - size_t field_type = rng() % 3; - - for (size_t i = 0; i < part_size; ++i) - { - fields.push_back(getRandomField(field_type)); - subcolumn.insert(fields.back()); - } - } - - std::vector result_fields; - for (size_t i = 0; i < num_defaults; ++i) - result_fields.emplace_back(); - - result_fields.insert(result_fields.end(), fields.begin(), fields.end()); - return {std::move(subcolumn), std::move(result_fields)}; -} - -void checkFieldsAreEqual(ColumnObject::Subcolumn subcolumn, const std::vector & fields) -{ - ASSERT_EQ(subcolumn.size(), fields.size()); - for (size_t i = 0; i < subcolumn.size(); ++i) - { - Field field; - subcolumn.get(i, field); // Also check 'get' method. - if (!applyVisitor(FieldVisitorAccurateEquals(), field, fields[i])) - { - std::cerr << fmt::format("Wrong value at position {}, expected {}, got {}", - i, applyVisitor(FieldVisitorToString(), fields[i]), applyVisitor(FieldVisitorToString(), field)); - ASSERT_TRUE(false); - } - } -} - -constexpr size_t T = 1000; -constexpr size_t N = 1000; - -TEST(ColumnObject, InsertRangeFrom) -{ - for (size_t t = 0; t < T; ++t) - { - auto [subcolumn_dst, fields_dst] = generate(N); - auto [subcolumn_src, fields_src] = generate(N); - - ASSERT_EQ(subcolumn_dst.size(), fields_dst.size()); - ASSERT_EQ(subcolumn_src.size(), fields_src.size()); - - const auto & type_dst = subcolumn_dst.getLeastCommonType(); - const auto & type_src = subcolumn_src.getLeastCommonType(); - auto type_res = getLeastSupertypeOrString(DataTypes{type_dst, type_src}); - - size_t from = rng() % subcolumn_src.size(); - size_t to = rng() % subcolumn_src.size(); - if (from > to) - std::swap(from, to); - ++to; - - for (auto & field : fields_dst) - { - if (field.isNull()) - field = type_res->getDefault(); - else - field = convertFieldToTypeOrThrow(field, *type_res); - } - - for (size_t i = from; i < to; ++i) - { - if (fields_src[i].isNull()) - fields_dst.push_back(type_res->getDefault()); - else - fields_dst.push_back(convertFieldToTypeOrThrow(fields_src[i], *type_res)); - - } - - subcolumn_dst.insertRangeFrom(subcolumn_src, from, to - from); - checkFieldsAreEqual(subcolumn_dst, fields_dst); - } -} - -TEST(ColumnObject, Unflatten) -{ - auto check_empty_tuple = [](const auto & type, const auto & column) - { - const auto & type_tuple = assert_cast(*type); - const auto & column_tuple = assert_cast(*column); - - ASSERT_EQ(type_tuple.getElements().size(), 1); - ASSERT_EQ(type_tuple.getElements()[0]->getName(), "UInt8"); - ASSERT_EQ(type_tuple.getElementNames()[0], ColumnObject::COLUMN_NAME_DUMMY); - - ASSERT_EQ(column_tuple.getColumns().size(), 1); - ASSERT_EQ(column_tuple.getColumns()[0]->getName(), "UInt8"); - }; - - { - auto column_object = ColumnObject::create(false); - auto [column, type] = unflattenObjectToTuple(*column_object); - - check_empty_tuple(type, column); - ASSERT_EQ(column->size(), 0); - } - - { - auto column_object = ColumnObject::create(false); - column_object->insertManyDefaults(5); - auto [column, type] = unflattenObjectToTuple(*column_object); - - check_empty_tuple(type, column); - ASSERT_EQ(column->size(), 5); - } -} From 0e62ed1e54ae65d16ad165ea8ed016c4eedd29a2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 17:25:25 +0100 Subject: [PATCH 151/356] Improve unit tests --- src/Columns/tests/gtest_weak_hash_32.cpp | 3 --- src/Common/tests/gtest_DateLUTImpl.cpp | 8 ++++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index 44337e5d3f0..2c95998761b 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -1,12 +1,10 @@ #include -#include #include #include #include #include #include -#include #include #include @@ -14,7 +12,6 @@ #include #include -#include #include #include diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 3d3a3f04941..3f9b75e264d 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -1,3 +1,9 @@ +#if !defined(SANITIZER) + +/// This test is slow due to exhaustive checking of time zones. +/// Better to replace with randomization. +/// Also, recommended to replace with a functional test for better maintainability. + #include #include @@ -548,3 +554,5 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970, // {0, 0 + 11 * 3600 * 24 + 12, 11}, })) ); + +#endif From df498107c91cabb20e0e66db1b8b8fa122b0a842 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 17:29:09 +0100 Subject: [PATCH 152/356] Fix test --- tests/integration/test_grant_and_revoke/test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 75a59ceac39..46d8d254a0a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -186,10 +186,7 @@ def test_grant_all_on_table(): instance.query("GRANT ALL ON test.table TO B", user="A") assert ( instance.query("SHOW GRANTS FOR B") - == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " - "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " - "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.`table` TO B\n" + == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.`table` TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") assert instance.query("SHOW GRANTS FOR B") == "" From 56a9c160bc2c08071d4e936f89a5505bc7f210c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 17:29:09 +0100 Subject: [PATCH 153/356] Fix test --- tests/integration/test_grant_and_revoke/test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 75a59ceac39..46d8d254a0a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -186,10 +186,7 @@ def test_grant_all_on_table(): instance.query("GRANT ALL ON test.table TO B", user="A") assert ( instance.query("SHOW GRANTS FOR B") - == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " - "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " - "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.`table` TO B\n" + == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.`table` TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") assert instance.query("SHOW GRANTS FOR B") == "" From 6094dca61c992a05a33aa9412182ffb7f9928b64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 17:35:53 +0100 Subject: [PATCH 154/356] Update tests --- ...3_parsedatetimebesteffort_syslog.reference | 52 +++++++++---------- .../02783_parsedatetimebesteffort_syslog.sql | 4 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference index 1340b3affe3..ef9d076449a 100644 --- a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference @@ -4,34 +4,34 @@ The argument is before the reference time point ─────────────────────────────────────────────── Row 1: ────── -syslog_arg: Jun 30 23:58:30 -res: 2023-06-30 23:58:30 -res_null: 2023-06-30 23:58:30 -res_zero: 2023-06-30 23:58:30 -res_us: 2023-06-30 23:58:30 -res_us_null: 2023-06-30 23:58:30 -res_us_zero: 2023-06-30 23:58:30 -res64: 2023-06-30 23:58:30.000 -res64_null: 2023-06-30 23:58:30.000 -res64_zero: 2023-06-30 23:58:30.000 -res64_us: 2023-06-30 23:58:30.000 -res64_us_null: 2023-06-30 23:58:30.000 -res64_us_zero: 2023-06-30 23:58:30.000 +syslog_arg: Jun 29 23:59:30 +res: 2023-06-29 23:59:30 +res_null: 2023-06-29 23:59:30 +res_zero: 2023-06-29 23:59:30 +res_us: 2023-06-29 23:59:30 +res_us_null: 2023-06-29 23:59:30 +res_us_zero: 2023-06-29 23:59:30 +res64: 2023-06-29 23:59:30.000 +res64_null: 2023-06-29 23:59:30.000 +res64_zero: 2023-06-29 23:59:30.000 +res64_us: 2023-06-29 23:59:30.000 +res64_us_null: 2023-06-29 23:59:30.000 +res64_us_zero: 2023-06-29 23:59:30.000 ────────────────────────────────────────────── The argument is after the reference time point ────────────────────────────────────────────── Row 1: ────── -syslog_arg: Jul 1 00:00:30 -res: 2022-07-01 00:00:30 -res_null: 2022-07-01 00:00:30 -res_zero: 2022-07-01 00:00:30 -res_us: 2022-07-01 00:00:30 -res_us_null: 2022-07-01 00:00:30 -res_us_zero: 2022-07-01 00:00:30 -res64: 2022-07-01 00:00:30.000 -res64_null: 2022-07-01 00:00:30.000 -res64_zero: 2022-07-01 00:00:30.000 -res64_us: 2022-07-01 00:00:30.000 -res64_us_null: 2022-07-01 00:00:30.000 -res64_us_zero: 2022-07-01 00:00:30.000 +syslog_arg: Jul 1 23:59:30 +res: 2022-06-30 23:59:30 +res_null: 2022-06-30 23:59:30 +res_zero: 2022-06-30 23:59:30 +res_us: 2022-06-30 23:59:30 +res_us_null: 2022-06-30 23:59:30 +res_us_zero: 2022-06-30 23:59:30 +res64: 2022-06-30 23:59:30.000 +res64_null: 2022-06-30 23:59:30.000 +res64_zero: 2022-06-30 23:59:30.000 +res64_us: 2022-06-30 23:59:30.000 +res64_us_null: 2022-06-30 23:59:30.000 +res64_us_zero: 2022-06-30 23:59:30.000 diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql index c67722393ab..ecaec9f99bf 100644 --- a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql @@ -8,7 +8,7 @@ SELECT '──────────────────────── WITH toDateTime('2023-06-30 23:59:30') AS dt_ref, now() AS dt_now, - date_sub(MINUTE, 1, dt_now) as dt_before, + date_sub(DAY, 1, dt_now) as dt_before, dateDiff(SECOND, dt_ref, dt_now) AS time_shift, formatDateTime(dt_before, '%b %e %T') AS syslog_before SELECT @@ -34,7 +34,7 @@ SELECT '──────────────────────── WITH toDateTime('2023-06-30 23:59:30') AS dt_ref, now() AS dt_now, - date_add(MINUTE, 1, dt_now) as dt_after, + date_add(DAY, 1, dt_now) as dt_after, dateDiff(SECOND, dt_ref, dt_now) AS time_shift, formatDateTime(dt_after, '%b %e %T') AS syslog_after SELECT From 34cf2a167199f49370d509c3fb76e6c7dbead76e Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 3 Mar 2024 17:27:37 +0000 Subject: [PATCH 155/356] Fix scheduler fairness test --- src/Common/Scheduler/Nodes/tests/ResourceTest.h | 2 +- .../Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/Scheduler/Nodes/tests/ResourceTest.h b/src/Common/Scheduler/Nodes/tests/ResourceTest.h index ee168a76102..ea3f9edf765 100644 --- a/src/Common/Scheduler/Nodes/tests/ResourceTest.h +++ b/src/Common/Scheduler/Nodes/tests/ResourceTest.h @@ -282,7 +282,7 @@ struct ResourceTestManager : public ResourceTestBase return link_data[link]; } - // Use at least two threads for each queue to avoid queue being deactivated: + // Use exactly two threads for each queue to avoid queue being deactivated (happens with 1 thread) and reordering (happens with >2 threads): // while the first request is executing, the second request is in queue - holding it active. // use onEnqueue() and onExecute() functions for this purpose. void onEnqueue(ResourceLink link) diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index 961a3b6f713..dbf94df8c18 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -56,7 +56,7 @@ TEST(SchedulerDynamicResourceManager, Fairness) EXPECT_NEAR(cur_unfairness, 0, 1); }; - constexpr size_t threads_per_queue = 3; + constexpr size_t threads_per_queue = 2; int requests_per_thread = 100; ResourceTest t(2 * threads_per_queue + 1); From 7dfe45e8d5cc38f086a88158d3e3c0b2412d8f99 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 3 Mar 2024 20:38:34 +0100 Subject: [PATCH 156/356] Do not retry queries if container is down in integration tests (resubmit) Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index d6292c51bbe..52c0d8a8ee5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3489,6 +3489,11 @@ class ClickHouseInstance: if check_callback(result): return result time.sleep(sleep_time) + except QueryRuntimeException as ex: + # Container is down, this is likely due to server crash. + if "No route to host" in str(ex): + raise + time.sleep(sleep_time) except Exception as ex: # logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) From 6f3bad904b0d2f900bc8cb08a23e6a00027968fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 20:42:44 +0100 Subject: [PATCH 157/356] Mark one setting as obsolete --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..348e38cf269 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -875,7 +875,6 @@ class IColumn; M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ - M(Bool, allow_experimental_shared_merge_tree, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \ M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ @@ -902,6 +901,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_geo_types, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_alter_materialized_view_structure, true) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_shared_merge_tree, true) \ \ MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \ MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \ From e6fd4658f47a4b4edf07a200d502a0acbd608821 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 20:44:23 +0100 Subject: [PATCH 158/356] Sync documentation --- src/Core/Settings.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 348e38cf269..2e5e1db78ed 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -875,9 +875,9 @@ class IColumn; M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ - M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \ - M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ - M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ + M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud. Number of background threads for speculatively downloading new data parts into file cache, when cache_populated_by_fetch is enabled. Zero to disable.", 0) \ + M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \ + M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ // End of COMMON_SETTINGS From 5c3262832be297a15c53506ef088f9c80b7bf0d9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 21:38:49 +0100 Subject: [PATCH 159/356] Fix a test with Analyzer --- tests/analyzer_tech_debt.txt | 1 - .../02493_inconsistent_hex_and_binary_number.expect | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 0672d3085fe..f1093fa07db 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -13,7 +13,6 @@ 01952_optimize_distributed_group_by_sharding_key 02174_cte_scalar_cache_mv 02354_annoy -02493_inconsistent_hex_and_binary_number # Check after constants refactoring 02901_parallel_replicas_rollup # Flaky. Please don't delete them without fixing them: diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect index 2d595b0f492..1cc11f9bf9f 100755 --- a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -18,23 +18,23 @@ spawn bash send "source $basedir/../shell_config.sh\r" send "\$CLICKHOUSE_CLIENT --query 'select 0b'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b ;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x ;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "exit\r" expect eof From 772cf60de18319cad57992e1ace391f706b5ddb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 21:42:04 +0100 Subject: [PATCH 160/356] Edit SettingsChangesHistory --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..f195ef487ab 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", {{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}}}, {"24.2", { {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From b41b935a6a9d386c259e71d5ae5be530076fb89d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 22:05:48 +0100 Subject: [PATCH 161/356] Two tests are fixed --- tests/analyzer_integration_broken_tests.txt | 1 - tests/analyzer_tech_debt.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 796ca6bca22..e819e134706 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -3,5 +3,4 @@ test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed -test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster test_select_access_rights/test_main.py::test_alias_columns diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 0672d3085fe..9328504cd7c 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -20,4 +20,3 @@ 01287_max_execution_speed 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET 02404_memory_bound_merging -02479_race_condition_between_insert_and_droppin_mv From fe50f5ddf64f4c9902183854365f247a3b72edd0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 22:49:54 +0100 Subject: [PATCH 162/356] Make String a supertype for strings and enums --- src/DataTypes/getLeastSupertype.cpp | 10 ++++++---- .../03003_enum_and_string_compatible.reference | 1 + .../0_stateless/03003_enum_and_string_compatible.sql | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03003_enum_and_string_compatible.reference create mode 100644 tests/queries/0_stateless/03003_enum_and_string_compatible.sql diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index d67d5eb24e0..dec77119eed 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -474,16 +474,18 @@ DataTypePtr getLeastSupertype(const DataTypes & types) type_ids.insert(type->getTypeId()); /// For String and FixedString, or for different FixedStrings, the common type is String. - /// No other types are compatible with Strings. TODO Enums? + /// If there are Enums and any type of Strings, the common type is String. + /// No other types are compatible with Strings. { size_t have_string = type_ids.count(TypeIndex::String); size_t have_fixed_string = type_ids.count(TypeIndex::FixedString); + size_t have_enums = type_ids.count(TypeIndex::Enum8) + type_ids.count(TypeIndex::Enum16); if (have_string || have_fixed_string) { - bool all_strings = type_ids.size() == (have_string + have_fixed_string); - if (!all_strings) - return throwOrReturn(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE); + bool all_compatible_with_string = type_ids.size() == (have_string + have_fixed_string + have_enums); + if (!all_compatible_with_string) + return throwOrReturn(types, "because some of them are String/FixedString/Enum and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared(); } diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.reference b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference new file mode 100644 index 00000000000..acf5fe0d423 --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference @@ -0,0 +1 @@ +['Hello','Goodbye','test'] diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.sql b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql new file mode 100644 index 00000000000..0abba6741ac --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql @@ -0,0 +1 @@ +WITH 'Hello'::Enum8('Hello', 'World') AS enum1, 'test'::Enum8('test', 'best') AS enum2 SELECT [enum1, 'Goodbye', enum2]; From cbf5443585b82519310a45a7b4ad2f03873e796f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 00:11:55 +0100 Subject: [PATCH 163/356] Remove old code --- base/base/Decimal.h | 9 +-- base/base/JSON.cpp | 10 +-- base/base/JSON.h | 10 +-- base/base/coverage.cpp | 9 --- base/base/defines.h | 80 ++++++------------- base/base/iostream_debug_helpers.h | 2 - base/base/phdr_cache.cpp | 10 +-- base/glibc-compatibility/musl/getauxval.c | 6 +- base/harmful/harmful.c | 6 +- programs/benchmark/Benchmark.cpp | 4 - programs/client/Client.cpp | 4 - src/Columns/Collator.cpp | 6 +- src/Common/FailPoint.h | 4 - src/Common/MatchGenerator.cpp | 20 ++--- src/Common/MemorySanitizer.h | 4 - src/Common/NetlinkMetricsProvider.cpp | 6 +- src/Common/StackTrace.h | 4 +- src/Common/TargetSpecific.h | 40 ---------- .../integer_hash_tables_and_hashes.cpp | 10 +-- src/Common/intExp.h | 10 +-- src/Common/re2.h | 11 +-- src/Common/tests/gtest_DateLUTImpl.cpp | 4 +- src/Compression/LZ4_decompress_faster.cpp | 8 -- src/Formats/MarkInCompressedFile.h | 10 +-- src/Functions/FunctionsHashing.h | 10 +-- src/Functions/idna.cpp | 17 ++-- src/Functions/punycode.cpp | 4 - src/Functions/s2_fwd.h | 4 - src/Functions/seriesDecomposeSTL.cpp | 6 -- src/Functions/seriesPeriodDetectFFT.cpp | 14 ++-- src/IO/Archives/ArchiveUtils.h | 2 - src/IO/DoubleConverter.h | 4 - src/IO/WriteHelpers.h | 4 - src/IO/readFloatText.h | 4 - src/Interpreters/examples/hash_map_string.cpp | 4 +- src/Parsers/ExpressionListParsers.h | 4 - .../MergeTree/MergeTreeDataPartType.h | 10 +-- src/Storages/StorageS3.cpp | 10 +-- 38 files changed, 80 insertions(+), 304 deletions(-) diff --git a/base/base/Decimal.h b/base/base/Decimal.h index afa186faf5b..66ff623217c 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -1,14 +1,9 @@ #pragma once + #include #include +#include -#if !defined(NO_SANITIZE_UNDEFINED) -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -#else - #define NO_SANITIZE_UNDEFINED -#endif -#endif namespace DB { diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 0b43be38149..9da059c98b6 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -10,14 +10,10 @@ #define JSON_MAX_DEPTH 100 -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop /// Read unsigned integer in a simple form from a non-0-terminated string. diff --git a/base/base/JSON.h b/base/base/JSON.h index 850b74715c6..bc053670a96 100644 --- a/base/base/JSON.h +++ b/base/base/JSON.h @@ -39,14 +39,10 @@ // NOLINTBEGIN(google-explicit-constructor) -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop // NOLINTEND(google-explicit-constructor) class JSON diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index b85f1a16d32..99b897c4571 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -13,11 +13,7 @@ #include -# if defined(__clang__) extern "C" void __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) -extern "C" void __gcov_exit(); -# endif #endif @@ -28,12 +24,7 @@ void dumpCoverageReportIfPossible() static std::mutex mutex; std::lock_guard lock(mutex); -# if defined(__clang__) __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) - __gcov_exit(); -# endif - #endif } diff --git a/base/base/defines.h b/base/base/defines.h index 02058a29096..1f02748633d 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -11,7 +11,7 @@ /// including /// - it should not have fallback to 0, /// since this may create false-positive detection (common problem) -#if defined(__clang__) && defined(__has_feature) +#if defined(__has_feature) # define ch_has_feature __has_feature #endif @@ -76,24 +76,11 @@ /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. /// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. -#if defined(__clang__) -# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) -#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. -# define NO_SANITIZE_UNDEFINED -# define NO_SANITIZE_ADDRESS -# define NO_SANITIZE_THREAD -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE -#endif - -#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14 -# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) -#else -# define DISABLE_SANITIZER_INSTRUMENTATION -#endif - +#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) +#define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +#define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) +#define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) #if !__has_include() || !defined(ADDRESS_SANITIZER) # define ASAN_UNPOISON_MEMORY_REGION(a, b) @@ -135,54 +122,33 @@ /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers. /// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader -#if defined(__clang__) -# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability -# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability -# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability -# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability -# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock -# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function -# define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability -# define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it -# define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure -# define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability -# define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it -# define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure -# define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability -# define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability +#define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability +#define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability +#define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability +#define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability +#define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock +#define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function +#define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability +#define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it +#define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure +#define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability +#define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it +#define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure +#define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability +#define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of /// suppressing them in the whole function /// Consider adding a comment when using these macros. -# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) /// This macro is useful when only one thread writes to a member /// and you want to read this member from the same thread without locking a mutex. /// It's safe (because no concurrent writes are possible), but TSA generates a warning. /// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) - -#else -# define TSA_GUARDED_BY(...) -# define TSA_PT_GUARDED_BY(...) -# define TSA_REQUIRES(...) -# define TSA_REQUIRES_SHARED(...) -# define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_CAPABILITY(...) -# define TSA_ACQUIRE(...) -# define TSA_TRY_ACQUIRE(...) -# define TSA_RELEASE(...) -# define TSA_ACQUIRE_SHARED(...) -# define TSA_TRY_ACQUIRE_SHARED(...) -# define TSA_RELEASE_SHARED(...) -# define TSA_SCOPED_LOCKABLE - -# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) -#endif +#define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) /// A template function for suppressing warnings about unused variables or function results. template diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h index f531a56031b..5c601251272 100644 --- a/base/base/iostream_debug_helpers.h +++ b/base/base/iostream_debug_helpers.h @@ -155,9 +155,7 @@ Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-mis return dumpValue(out, x) << "; "; } -#ifdef __clang__ #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" -#endif #define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR)); #define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] "; diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 7d37f01b560..802d1bf35f5 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -11,10 +11,8 @@ /// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own. #ifdef USE_PHDR_CACHE -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# pragma clang diagnostic ignored "-Wunused-macros" -#endif +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#pragma clang diagnostic ignored "-Wunused-macros" #define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) @@ -57,10 +55,6 @@ std::atomic phdr_cache {}; extern "C" -#ifndef __clang__ -[[gnu::visibility("default")]] -[[gnu::externally_visible]] -#endif int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data) { auto * current_phdr_cache = phdr_cache.load(); diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 44a9f979f99..ea5cff9fc11 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -20,11 +20,7 @@ /// Suppress TSan since it is possible for this code to be called from multiple threads, /// and initialization is safe to be done multiple times from multiple threads. -#if defined(__clang__) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -#else -# define NO_SANITIZE_THREAD -#endif +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) // We don't have libc struct available here. // Compute aux vector manually (from /proc/self/auxv). diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 78796ca0c05..54b552a84ea 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -6,11 +6,7 @@ /// It is only enabled in debug build (its intended use is for CI checks). #if !defined(NDEBUG) -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" -#else - #pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" -#endif +#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" /// We cannot use libc headers here. long write(int, const void *, unsigned long); diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index fac88c0621f..45dadfef774 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -567,10 +567,6 @@ public: } -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - int mainEntryClickHouseBenchmark(int argc, char ** argv) { using namespace DB; diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 649a64b9de4..a2bd6b6016a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -51,10 +51,6 @@ #include #include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - namespace fs = std::filesystem; using namespace std::literals; diff --git a/src/Columns/Collator.cpp b/src/Columns/Collator.cpp index 434a30c0450..f6a3bb40d25 100644 --- a/src/Columns/Collator.cpp +++ b/src/Columns/Collator.cpp @@ -8,10 +8,8 @@ # include # include #else -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wunused-private-field" -# pragma clang diagnostic ignored "-Wmissing-noreturn" -# endif +# pragma clang diagnostic ignored "-Wunused-private-field" +# pragma clang diagnostic ignored "-Wmissing-noreturn" #endif #include diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h index 613cfb15322..b3e1214d597 100644 --- a/src/Common/FailPoint.h +++ b/src/Common/FailPoint.h @@ -5,18 +5,14 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#endif #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include diff --git a/src/Common/MatchGenerator.cpp b/src/Common/MatchGenerator.cpp index f047c21b470..9078a5d181f 100644 --- a/src/Common/MatchGenerator.cpp +++ b/src/Common/MatchGenerator.cpp @@ -1,18 +1,14 @@ -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -# pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -# pragma clang diagnostic ignored "-Wnested-anon-types" -# pragma clang diagnostic ignored "-Wunused-parameter" -# pragma clang diagnostic ignored "-Wshadow-field-in-constructor" -# pragma clang diagnostic ignored "-Wdtor-name" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wshadow-field-in-constructor" +#pragma clang diagnostic ignored "-Wdtor-name" #include #include #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop #ifdef LOG_INFO #undef LOG_INFO diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h index bd44ff62acb..5d72e0b8f73 100644 --- a/src/Common/MemorySanitizer.h +++ b/src/Common/MemorySanitizer.h @@ -2,10 +2,8 @@ #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-id-macro" -#endif #undef __msan_unpoison #undef __msan_test_shadow @@ -32,6 +30,4 @@ # endif #endif -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Common/NetlinkMetricsProvider.cpp b/src/Common/NetlinkMetricsProvider.cpp index 6969b5b7542..172fede525a 100644 --- a/src/Common/NetlinkMetricsProvider.cpp +++ b/src/Common/NetlinkMetricsProvider.cpp @@ -22,10 +22,8 @@ #include #include -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" - #pragma clang diagnostic ignored "-Wnested-anon-types" -#endif +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index e5654162ecb..a16d889a67a 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -11,9 +11,7 @@ #ifdef OS_DARWIN // ucontext is not available without _XOPEN_SOURCE -# ifdef __clang__ -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# endif +# pragma clang diagnostic ignored "-Wreserved-id-macro" # define _XOPEN_SOURCE 700 #endif #include diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 229150ecccb..f9523f667b2 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -102,8 +102,6 @@ String toString(TargetArch arch); /// NOLINTNEXTLINE #define USE_MULTITARGET_CODE 1 -#if defined(__clang__) - #define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) @@ -134,45 +132,7 @@ String toString(TargetArch arch); * To prevent this warning we define this function inside every macros with pragmas. */ # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); -#else -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native"))) -#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) -#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) -#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) -#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native"))) -#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native"))) -#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) -#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE - -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")") -# define BEGIN_AVX512VBMI_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")") -# define BEGIN_AVX512BW_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native\")") -# define BEGIN_AVX512F_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")") -# define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native\")") -# define BEGIN_AVX_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native\")") -# define BEGIN_SSE42_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,tune=native\")") -# define END_TARGET_SPECIFIC_CODE \ - _Pragma("GCC pop_options") - -/* GCC doesn't show such warning, we don't need to define anything. - */ -# define DUMMY_FUNCTION_DEFINITION -#endif #define DECLARE_SSE42_SPECIFIC_CODE(...) \ BEGIN_SSE42_SPECIFIC_CODE \ diff --git a/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp b/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp index 0bf13ef91ed..e6c09905ab8 100644 --- a/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp +++ b/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp @@ -26,10 +26,8 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" #include using Key = UInt64; @@ -385,6 +383,4 @@ OK_GOOGLE(TestRndInput, HashMap, TwoRoundsTwoVarsHash, elements_to_insert) OK_GOOGLE(TestRndInput, HashMap, WyHash, elements_to_insert) OK_GOOGLE(TestRndInput, HashMap, XXH3Hash, elements_to_insert) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Common/intExp.h b/src/Common/intExp.h index 69b0f09975a..25ae2a8a4b6 100644 --- a/src/Common/intExp.h +++ b/src/Common/intExp.h @@ -4,15 +4,7 @@ #include #include - -// Also defined in Core/Defines.h -#if !defined(NO_SANITIZE_UNDEFINED) -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -#else - #define NO_SANITIZE_UNDEFINED -#endif -#endif +#include /// On overflow, the function returns unspecified value. diff --git a/src/Common/re2.h b/src/Common/re2.h index c81b7157e91..ef1d2ba2a16 100644 --- a/src/Common/re2.h +++ b/src/Common/re2.h @@ -1,11 +1,6 @@ #pragma once - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 3f9b75e264d..d1d10dafb63 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -16,9 +16,7 @@ /// For the expansion of gtest macros. -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic ignored "-Wused-but-marked-unused" // All timezones present at build time and embedded into ClickHouse binary. extern const char * auto_time_zones[]; diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index c7f6571cb46..b548feed848 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -49,9 +49,7 @@ inline void copy8(UInt8 * dst, const UInt8 * src) inline void wildCopy8(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy8(dst, src); @@ -234,9 +232,7 @@ inline void copy16(UInt8 * dst, const UInt8 * src) inline void wildCopy16(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy16(dst, src); @@ -371,9 +367,7 @@ inline void copy32(UInt8 * dst, const UInt8 * src) inline void wildCopy32(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy32(dst, src); @@ -487,9 +481,7 @@ bool NO_INLINE decompressImpl(const char * const source, char * const dest, size UInt8 * const output_end = op + dest_size; /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif while (true) { size_t length; diff --git a/src/Formats/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h index 92f4a030a1a..06ed1476410 100644 --- a/src/Formats/MarkInCompressedFile.h +++ b/src/Formats/MarkInCompressedFile.h @@ -12,10 +12,8 @@ namespace DB /// It's a bug in clang with three-way comparison operator /// https://github.com/llvm/llvm-project/issues/55919 -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" /** Mark is the position in the compressed file. The compressed file consists of adjacent compressed blocks. * Mark is a tuple - the offset in the file to the start of the compressed block, the offset in the decompressed block to the start of the data. @@ -41,9 +39,7 @@ struct MarkInCompressedFile } }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop /** * In-memory representation of an array of marks. diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index d0edd34e657..79b33e2f75b 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -9,10 +9,8 @@ #include "config.h" -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" #include #include @@ -1604,6 +1602,4 @@ using FunctionXXH3 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; } -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp index a73347400c6..c9682b44b2c 100644 --- a/src/Functions/idna.cpp +++ b/src/Functions/idna.cpp @@ -6,16 +6,12 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wnewline-eof" -#endif -# include -# include -# include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnewline-eof" +#include +#include +#include +#pragma clang diagnostic pop namespace DB { @@ -199,4 +195,3 @@ Computes the Unicode representation of ASCII-encoded Internationalized Domain Na } #endif - diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp index 159189744bd..107302069b4 100644 --- a/src/Functions/punycode.cpp +++ b/src/Functions/punycode.cpp @@ -6,15 +6,11 @@ #include #include -#ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wnewline-eof" -#endif # include # include -#ifdef __clang__ # pragma clang diagnostic pop -#endif namespace DB { diff --git a/src/Functions/s2_fwd.h b/src/Functions/s2_fwd.h index 6e0b58ae118..4ed5d4fcc1b 100644 --- a/src/Functions/s2_fwd.h +++ b/src/Functions/s2_fwd.h @@ -1,8 +1,6 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wambiguous-reversed-operator" -#endif #include #include @@ -11,6 +9,4 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index e9276c4aefb..fbabc801913 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -1,15 +1,9 @@ -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" #pragma clang diagnostic ignored "-Wshadow" #pragma clang diagnostic ignored "-Wimplicit-float-conversion" -#endif - #include - -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include #include diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index 61e3319d810..c01f6b7f07b 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -1,18 +1,14 @@ #include "config.h" #if USE_POCKETFFT -# ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wshadow" -# pragma clang diagnostic ignored "-Wextra-semi-stmt" -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -# endif +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wshadow" +# pragma clang diagnostic ignored "-Wextra-semi-stmt" +# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" # include -# ifdef __clang__ -# pragma clang diagnostic pop -# endif +# pragma clang diagnostic pop # include # include diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h index 810b9d8d730..1b66be005a2 100644 --- a/src/IO/Archives/ArchiveUtils.h +++ b/src/IO/Archives/ArchiveUtils.h @@ -4,11 +4,9 @@ #if USE_LIBARCHIVE -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-macro-identifier" #include #include #endif -#endif diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 18cbe4e3a1d..45721da5248 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -1,17 +1,13 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" -#endif #include #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif namespace DB diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index f438990fd1c..8b743e6351b 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -39,15 +39,11 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-parameter" #pragma clang diagnostic ignored "-Wsign-compare" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 51964636389..597f0a06fb9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -6,14 +6,10 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif /** Methods for reading floating point numbers from text with decimal representation. * There are "precise", "fast" and "simple" implementations. diff --git a/src/Interpreters/examples/hash_map_string.cpp b/src/Interpreters/examples/hash_map_string.cpp index f3ec104a5f7..f30a9a4cac1 100644 --- a/src/Interpreters/examples/hash_map_string.cpp +++ b/src/Interpreters/examples/hash_map_string.cpp @@ -20,9 +20,7 @@ #include #include -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#endif +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" struct CompactStringRef diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 6dba5a9c31f..235d5782630 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -9,10 +9,8 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wc99-extensions" -#endif namespace DB { @@ -297,6 +295,4 @@ protected: } -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index 8b06da5167e..5096ee86db1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -26,10 +26,8 @@ static E parseEnum(const String & str) /// It's a bug in clang with three-way comparison operator /// https://github.com/llvm/llvm-project/issues/55919 -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" /// Types of data part format. class MergeTreeDataPartType @@ -86,9 +84,7 @@ private: Value value; }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop struct MergeTreeDataPartFormat { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index da90dbb4076..dce51ada042 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -68,14 +68,10 @@ #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop namespace fs = std::filesystem; From fc6f3c8399717891da40b820aa35385bd80a4540 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 00:49:22 +0100 Subject: [PATCH 164/356] Live view's life is close to the end --- src/Common/ErrorCodes.cpp | 1 - src/Core/Block.h | 1 - src/NOTICE | 9 --- src/Parsers/ASTAlterQuery.cpp | 3 - src/Parsers/ASTAlterQuery.h | 3 - src/Parsers/ASTCreateQuery.cpp | 7 --- src/Parsers/ASTCreateQuery.h | 1 - src/Parsers/ParserAlterQuery.cpp | 4 -- src/Parsers/ParserCreateQuery.cpp | 24 ------- src/Storages/LiveView/StorageLiveView.cpp | 77 +---------------------- src/Storages/LiveView/StorageLiveView.h | 12 +--- 11 files changed, 4 insertions(+), 138 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ca00f2fd513..f7482d44b66 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -379,7 +379,6 @@ M(467, CANNOT_PARSE_BOOL) \ M(468, CANNOT_PTHREAD_ATTR) \ M(469, VIOLATED_CONSTRAINT) \ - M(470, QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW) \ M(471, INVALID_SETTING_VALUE) \ M(472, READONLY_SETTING) \ M(473, DEADLOCK_AVOIDED) \ diff --git a/src/Core/Block.h b/src/Core/Block.h index 1a4f8c2e446..c8bebb4552a 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -177,7 +177,6 @@ using BlockPtr = std::shared_ptr; using Blocks = std::vector; using BlocksList = std::list; using BlocksPtr = std::shared_ptr; -using BlocksPtrs = std::shared_ptr>; /// Extends block with extra data in derived classes struct ExtraBlock diff --git a/src/NOTICE b/src/NOTICE index c68280b1529..4e5f66c65c9 100644 --- a/src/NOTICE +++ b/src/NOTICE @@ -13,18 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Common/ErrorCodes.cpp -Common/UInt128.h -Core/Block.h -Core/Defines.h -Core/Settings.h -Databases/DatabasesCommon.cpp -IO/WriteBufferValidUTF8.cpp -Interpreters/InterpreterAlterQuery.cpp Interpreters/InterpreterCreateQuery.cpp Interpreters/InterpreterFactory.cpp Parsers/ASTAlterQuery.cpp -Parsers/ASTAlterQuery.h Parsers/ASTCreateQuery.cpp Parsers/ASTCreateQuery.h Parsers/ParserAlterQuery.cpp diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 605cc4ade42..a93ad1d1746 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -60,8 +60,6 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); - if (values) - res->values = res->children.emplace_back(values->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -518,7 +516,6 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); - f(reinterpret_cast(&values)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 867ebf26194..1799b75fce4 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -166,9 +166,6 @@ public: /// For MODIFY_SQL_SECURITY IAST * sql_security = nullptr; - /// In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - IAST * values = nullptr; - /// Target column name IAST * rename_to = nullptr; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index e8ccb8e9377..1315ea5784c 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -348,13 +348,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") << quoteString(*attach_from_path); - if (live_view_periodic_refresh) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "") - << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") - << *live_view_periodic_refresh; - } - formatOnCluster(settings); } else diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index aeb84d754e3..64e6bc8ce48 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -122,7 +122,6 @@ public: ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) ASTRefreshStrategy * refresh_strategy = nullptr; // For CREATE MATERIALIZED VIEW ... REFRESH ... - std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... bool is_watermark_strictly_ascending{false}; /// STRICTLY ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW bool is_watermark_ascending{false}; /// ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 495e91b96d5..b1cc7622e00 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -138,7 +138,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserList parser_reset_setting( std::make_unique(), std::make_unique(TokenType::Comma), /* allow_empty = */ false); - ParserNameList values_p; ParserSelectWithUnionQuery select_p; ParserSQLSecurity sql_security_p; ParserRefreshStrategy refresh_p; @@ -163,7 +162,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr command_settings_changes; ASTPtr command_settings_resets; ASTPtr command_select; - ASTPtr command_values; ASTPtr command_rename_to; ASTPtr command_sql_security; @@ -944,8 +942,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->settings_resets = command->children.emplace_back(std::move(command_settings_resets)).get(); if (command_select) command->select = command->children.emplace_back(std::move(command_select)).get(); - if (command_values) - command->values = command->children.emplace_back(std::move(command_values)).get(); if (command_sql_security) command->sql_security = command->children.emplace_back(std::move(command_sql_security)).get(); if (command_rename_to) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 3c86ed6b518..8ebadf4606f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -917,15 +917,11 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr as_database; ASTPtr as_table; ASTPtr select; - ASTPtr live_view_periodic_refresh; ASTPtr sql_security; String cluster_str; bool attach = false; bool if_not_exists = false; - bool with_and = false; - bool with_timeout = false; - bool with_periodic_refresh = false; if (!s_create.ignore(pos, expected)) { @@ -949,23 +945,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!table_name_p.parse(pos, table, expected)) return false; - if (ParserKeyword{"WITH"}.ignore(pos, expected)) - { - if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected)) - { - if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected)) - live_view_periodic_refresh = std::make_shared(static_cast(60)); - - with_periodic_refresh = true; - } - - else if (with_and) - return false; - - if (!with_timeout && !with_periodic_refresh) - return false; - } - if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) @@ -1028,9 +1007,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); - if (live_view_periodic_refresh) - query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as().value.safeGet()); - if (comment) query->set(query->comment, comment); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 2f011567b90..958e0a326cf 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -57,7 +57,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int TABLE_WAS_NOT_DROPPED; - extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW; + extern const int NOT_IMPLEMENTED; extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; } @@ -86,14 +86,14 @@ SelectQueryDescription buildSelectQueryDescription(const ASTPtr & select_query, if (inner_select_with_union_query) { if (inner_select_with_union_query->list_of_selects->children.size() != 1) - throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "UNION is not supported for LIVE VIEW"); inner_query = inner_select_with_union_query->list_of_selects->children[0]; } auto * inner_select_query = inner_query->as(); if (!inner_select_query) - throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "LIVE VIEWs are only supported for queries from tables, " "but there is no table name in select query."); @@ -226,29 +226,9 @@ StorageLiveView::StorageLiveView( DatabaseCatalog::instance().addViewDependency(select_query_description.select_table_id, table_id_); - if (query.live_view_periodic_refresh) - { - is_periodically_refreshed = true; - periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh}; - } - blocks_ptr = std::make_shared(); blocks_metadata_ptr = std::make_shared(); active_ptr = std::make_shared(true); - - periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", - [this] - { - try - { - periodicRefreshTaskFunc(); - } - catch (...) - { - tryLogCurrentException(log, "Exception in LiveView periodic refresh task in BackgroundSchedulePool"); - } - }); - periodic_refresh_task->deactivate(); } StorageLiveView::~StorageLiveView() @@ -285,17 +265,12 @@ void StorageLiveView::drop() void StorageLiveView::startup() { - if (is_periodically_refreshed) - periodic_refresh_task->activate(); } void StorageLiveView::shutdown(bool) { shutdown_called = true; - if (is_periodically_refreshed) - periodic_refresh_task->deactivate(); - DatabaseCatalog::instance().removeViewDependency(select_query_description.select_table_id, getStorageID()); } @@ -311,17 +286,7 @@ Pipe StorageLiveView::read( std::lock_guard lock(mutex); if (!(*blocks_ptr)) - { refreshImpl(lock); - } - else if (is_periodically_refreshed) - { - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - refreshImpl(lock); - } return Pipe(std::make_shared(*blocks_ptr, getHeader())); } @@ -362,9 +327,6 @@ Pipe StorageLiveView::watch( if (!(*blocks_ptr)) refreshImpl(lock); - - if (is_periodically_refreshed) - scheduleNextPeriodicRefresh(lock); } processed_stage = QueryProcessingStage::Complete; @@ -746,39 +708,6 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) return updated; } -void StorageLiveView::periodicRefreshTaskFunc() -{ - LOG_TRACE(log, "periodic refresh task"); - - std::lock_guard lock(mutex); - - if (hasActiveUsers(lock)) - scheduleNextPeriodicRefresh(lock); -} - -void StorageLiveView::scheduleNextPeriodicRefresh(const std::lock_guard & lock) -{ - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - { - refreshImpl(lock); - blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - } - current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - - auto next_refresh_time = blocks_time + periodic_live_view_refresh; - - if (current_time >= next_refresh_time) - periodic_refresh_task->scheduleAfter(0); - else - { - auto schedule_time = std::chrono::duration_cast (next_refresh_time - current_time); - periodic_refresh_task->scheduleAfter(static_cast(schedule_time.count())); - } -} - void registerStorageLiveView(StorageFactory & factory) { factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 6b8780cb81b..bf6b13fc837 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -21,6 +21,7 @@ limitations under the License. */ namespace DB { +using BlocksPtrs = std::shared_ptr>; struct BlocksMetadata { @@ -172,11 +173,6 @@ private: /// Read new data blocks that store query result bool getNewBlocks(const std::lock_guard & lock); - void periodicRefreshTaskFunc(); - - /// Must be called with mutex locked - void scheduleNextPeriodicRefresh(const std::lock_guard & lock); - SelectQueryDescription select_query_description; /// Query over the mergeable blocks to produce final result @@ -186,9 +182,6 @@ private: LoggerPtr log; - bool is_periodically_refreshed = false; - Seconds periodic_live_view_refresh; - /// Mutex to protect access to sample block and inner_blocks_query mutable std::mutex sample_block_lock; mutable Block sample_block; @@ -208,9 +201,6 @@ private: MergeableBlocksPtr mergeable_blocks; std::atomic shutdown_called = false; - - /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement - BackgroundSchedulePool::TaskHolder periodic_refresh_task; }; } From 395cc63792a0e8b6269355cf1853314702bc210a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 3 Mar 2024 15:54:01 -0800 Subject: [PATCH 165/356] Fix source of panics, add tests --- rust/Cargo.lock | 9 +++++---- rust/prql/Cargo.toml | 1 + rust/prql/src/lib.rs | 48 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index dbbe184228c..a242a8243b5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -6,6 +6,7 @@ version = 3 name = "_ch_rust_prql" version = "0.1.0" dependencies = [ + "anstream", "prqlc", "serde_json", ] @@ -698,9 +699,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -751,9 +752,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] diff --git a/rust/prql/Cargo.toml b/rust/prql/Cargo.toml index e70a49658e3..514b5176d48 100644 --- a/rust/prql/Cargo.toml +++ b/rust/prql/Cargo.toml @@ -4,6 +4,7 @@ name = "_ch_rust_prql" version = "0.1.0" [dependencies] +anstream = {version = "0.6.12"} prqlc = {version = "0.11.3", default-features = false} serde_json = "1.0" diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs index e17229e0f38..f4253cef08f 100644 --- a/rust/prql/src/lib.rs +++ b/rust/prql/src/lib.rs @@ -42,7 +42,12 @@ pub unsafe extern "C" fn prql_to_sql_impl( Err(err) => (true, err.to_string()), }; - set_output(res, out, out_size); + // NOTE: Over at PRQL we're considering to un-deprecate & re-enable the + // `color: false` option. If that happens, we can remove the `strip_str` + // here, which strips the output of color codes. + use anstream::adapter::strip_str; + + set_output(strip_str(&res).to_string(), out, out_size); match is_err { true => 1, @@ -58,10 +63,49 @@ pub unsafe extern "C" fn prql_to_sql( out_size: *mut u64, ) -> i64 { // NOTE: using cxxbridge we can return proper Result<> type. - panic::catch_unwind(|| prql_to_sql_impl(query, size, out, out_size)).unwrap_or(1) + panic::catch_unwind(|| prql_to_sql_impl(query, size, out, out_size)).unwrap_or_else(|_| { + set_output("prqlc panicked".to_string(), out, out_size); + 1 + }) } #[no_mangle] pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) { std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char)); } + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::{CStr, CString}; + + /// A test helper to offer a rust interface to the C bindings + fn run_compile(query: &str) -> (String, i64) { + let query_cstr = CString::new(query).unwrap(); + let query_ptr = query_cstr.as_ptr() as *const u8; + let query_size = query_cstr.to_bytes_with_nul().len() as u64 - 1; // Excluding the null terminator + + let mut out: *mut u8 = std::ptr::null_mut(); + let mut out_size = 0_u64; + + unsafe { + let success = prql_to_sql(query_ptr, query_size, &mut out, &mut out_size); + let output = CStr::from_ptr(out as *const i8) + .to_str() + .unwrap() + .to_string(); + prql_free_pointer(out); + (output, success) + } + } + + #[test] + fn test_prql_to_sql() { + assert!(run_compile("from x").0.contains("SELECT")); + assert!(run_compile("asdf").1 == 1); + // In prqlc 0.11.3, this is a panic, so that allows us to test that the + // panic is caught. When we upgrade prqlc, it won't be a panic any + // longer. + assert!(run_compile("x -> y").1 == 1); + } +} From 9a5085a4c309ace6ded156de6b969b14131d2cfe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 02:56:22 +0100 Subject: [PATCH 166/356] Fix buffer overflow in CompressionCodecMultiple --- src/Compression/CompressionCodecMultiple.cpp | 24 +++++++++++++++++-- ...3_codec_multiple_buffer_overflow.reference | 1 + .../03003_codec_multiple_buffer_overflow.sh | 8 +++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference create mode 100755 tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index b1eb7fb50c3..801609bbe14 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -88,14 +88,34 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour const auto codec = CompressionCodecFactory::instance().get(compression_method); auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); - compressed_buf.resize(compressed_buf.size() + additional_size_at_the_end_of_buffer); + if (compressed_buf.size() >= 1_GiB) + throw Exception(decompression_error_code, "Too large compressed size: {}", compressed_buf.size()); + + { + UInt32 bytes_to_resize; + if (common::addOverflow(static_cast(compressed_buf.size()), additional_size_at_the_end_of_buffer, bytes_to_resize)) + throw Exception(decompression_error_code, "Too large compressed size: {}", compressed_buf.size()); + + compressed_buf.resize(compressed_buf.size() + additional_size_at_the_end_of_buffer); + } + UInt32 uncompressed_size = readDecompressedBlockSize(compressed_buf.data()); + if (uncompressed_size >= 1_GiB) + throw Exception(decompression_error_code, "Too large uncompressed size: {}", uncompressed_size); + if (idx == 0 && uncompressed_size != decompressed_size) throw Exception(decompression_error_code, "Wrong final decompressed size in codec Multiple, got {}, expected {}", uncompressed_size, decompressed_size); - uncompressed_buf.resize(uncompressed_size + additional_size_at_the_end_of_buffer); + { + UInt32 bytes_to_resize; + if (common::addOverflow(uncompressed_size, additional_size_at_the_end_of_buffer, bytes_to_resize)) + throw Exception(decompression_error_code, "Too large uncompressed size: {}", uncompressed_size); + + uncompressed_buf.resize(bytes_to_resize); + } + codec->decompress(compressed_buf.data(), source_size, uncompressed_buf.data()); uncompressed_buf.swap(compressed_buf); source_size = uncompressed_size; diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference new file mode 100644 index 00000000000..b6db4b31fcb --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference @@ -0,0 +1 @@ +Too large diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh new file mode 100755 index 00000000000..3a1537356c2 --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne 'checksumchecksum\x91\xa4\x0a\x00\x00\x41\x00\x00\x20\x41\x41\x41\x40\x41\x00\x41\x41\x41\x41\x40\x41\x00\x00\x00\x00\x00\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\xfe\x7f\x00\x00\x41\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x82\x82\x82\x82\x63\x82\xff\xff\xff\xff\xff\xff\xff\xff\x95\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x41\x41\x41\x41\x41\x41\x41\x40\x08\x08\x08\x08\x08\x08\x00\x06\x00\x00\x00\x08\x00\x20\x00\x00\xef\xff\xff\xff\xe1\x40\x26\x41\x00\x1d\x01\x00\x00\x41\x42\x0b\xff\xff\xff\xe4\x41\x41\x4e\x41\x41\x06\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x7e\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x00\x04\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\xa9\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9b\x8f\x8f\x8f\x20\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Too large' From 7609856cd54b7a8f214182ac9d805bba586cda1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 03:06:27 +0100 Subject: [PATCH 167/356] Fix another error --- src/Compression/CompressionCodecMultiple.cpp | 5 ----- src/Compression/CompressionCodecNone.cpp | 6 +++++- src/Compression/CompressionCodecNone.h | 2 -- .../03003_codec_multiple_buffer_overflow.reference | 1 + .../0_stateless/03003_codec_multiple_buffer_overflow.sh | 3 +++ 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index 801609bbe14..6dc10677a3f 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -1,14 +1,9 @@ #include #include #include -#include #include -#include -#include #include #include -#include -#include namespace DB diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp index 065ac4a2625..53d62e51920 100644 --- a/src/Compression/CompressionCodecNone.cpp +++ b/src/Compression/CompressionCodecNone.cpp @@ -27,8 +27,12 @@ UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_s return source_size; } -void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const +void CompressionCodecNone::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { + if (source_size != uncompressed_size) + throw Exception(decompression_error_code, "Wrong data for compression codec NONE: source_size ({}) != uncompressed_size ({})", + source_size, uncompressed_size); + memcpy(dest, source, uncompressed_size); } diff --git a/src/Compression/CompressionCodecNone.h b/src/Compression/CompressionCodecNone.h index 1565720947d..5d6f135b351 100644 --- a/src/Compression/CompressionCodecNone.h +++ b/src/Compression/CompressionCodecNone.h @@ -18,9 +18,7 @@ public: void updateHash(SipHash & hash) const override; protected: - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; bool isCompression() const override { return false; } diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference index b6db4b31fcb..93d120dac01 100644 --- a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference @@ -1 +1,2 @@ Too large +Wrong data diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh index 3a1537356c2..93290f62c58 100755 --- a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh @@ -6,3 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo -ne 'checksumchecksum\x91\xa4\x0a\x00\x00\x41\x00\x00\x20\x41\x41\x41\x40\x41\x00\x41\x41\x41\x41\x40\x41\x00\x00\x00\x00\x00\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\xfe\x7f\x00\x00\x41\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x82\x82\x82\x82\x63\x82\xff\xff\xff\xff\xff\xff\xff\xff\x95\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x41\x41\x41\x41\x41\x41\x41\x40\x08\x08\x08\x08\x08\x08\x00\x06\x00\x00\x00\x08\x00\x20\x00\x00\xef\xff\xff\xff\xe1\x40\x26\x41\x00\x1d\x01\x00\x00\x41\x42\x0b\xff\xff\xff\xe4\x41\x41\x4e\x41\x41\x06\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x7e\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x00\x04\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\xa9\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9b\x8f\x8f\x8f\x20\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f' | ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Too large' + +echo -ne 'checksumchecksum\x91\x2b\x01\x00\x00\xbe\xe1\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x00\x04\x04\x04\x00\x08\x7f\x00\x01\x06\x82\x82\x82\x82\x82\x82\x82\x80\x41\x41\x41\x41\x41\x9a\x75\x6d\x63\x68\x65\x63\x6b\x73\x6d\x63\x68\x65\x63\x6b\x73\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x11\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\x14\xff\x7f\x00\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x61\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x00\x41\x14\x14\x41\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x0f\x0f\x0f\x0f\x0f\x41\x41\x41\x41\x64\x00\x30\x00\xcf\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x0b\x00\x00\x00\x41\x41\x41\xe8\x1f\xe1\x00\x01\x00\xff\x00\x41\x41\xbf\x41\x41\x40\x40\xe1\x00\x00\x00\x00\x1a\x00\x20\x00\x00\x00\x41\x00\x00\x00\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x00\x00\x0b\x00\xe6\xff\x00\x00\x00\x00\x00' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Wrong data' From 3476e36e87f720ef719c93621d2854ca364d2510 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 05:50:43 +0300 Subject: [PATCH 168/356] Update build.md 1. Add `apt-get update` command. 2. Put Docker to the bottom. --- docs/en/development/build.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/en/development/build.md b/docs/en/development/build.md index b474c445604..acdde7b9245 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -14,20 +14,6 @@ Supported platforms: - PowerPC 64 LE (experimental) - RISC-V 64 (experimental) -## Building in docker -We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: - -```bash -# define a directory for the output artifacts -output_dir="build_results" -# a simplest build -./docker/packager/packager --package-type=binary --output-dir "$output_dir" -# build debian packages -./docker/packager/packager --package-type=deb --output-dir "$output_dir" -# by default, debian packages use thin LTO, so we can override it to speed up the build -CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" -``` - ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. @@ -37,6 +23,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS. ### Install Prerequisites {#install-prerequisites} ``` bash +sudo apt-get update sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg ``` @@ -133,3 +120,17 @@ mkdir build cmake -S . -B build cmake --build build ``` + +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` From 629af21701df814b583aacd5e241cb95963842b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 05:52:47 +0300 Subject: [PATCH 169/356] Update build.md --- docs/en/development/build.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/development/build.md b/docs/en/development/build.md index acdde7b9245..5cbf851b785 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -44,7 +44,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -As of August 2023, clang-16 or higher will work. +As of March 2024, clang-17 or higher will work. GCC as a compiler is not supported. To build with a specific Clang version: @@ -54,8 +54,8 @@ to see what version you have installed before setting this environment variable. ::: ``` bash -export CC=clang-17 -export CXX=clang++-17 +export CC=clang-18 +export CXX=clang++-18 ``` ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} From 43e0a1b7081b26e77cd734817d494f79c2d2b10d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 05:59:02 +0100 Subject: [PATCH 170/356] Remove nonsense from SQL/JSON --- src/Functions/FunctionSQLJSON.h | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 0533f3d419a..3efa40df9be 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -26,6 +26,7 @@ #include "config.h" + namespace DB { namespace ErrorCodes @@ -114,8 +115,6 @@ private: }; -class EmptyJSONStringSerializer{}; - class FunctionSQLJSONHelpers { @@ -156,25 +155,11 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument (JSONPath) must be constant string"); } - const ColumnPtr & arg_jsonpath = json_path_column.column; - const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); - const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); - - const ColumnPtr & arg_json = json_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - /// Get data and offsets for 1 argument (JSONPath) - const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars(); - const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets(); - /// Prepare to parse 1 argument (JSONPath) - const char * query_begin = reinterpret_cast(&chars_path[0]); - const char * query_end = query_begin + offsets_path[0] - 1; + String query = typeid_cast(*json_path_column.column).getValue(); - /// Tokenize query - Tokens tokens(query_begin, query_end); + /// Tokenize the query + Tokens tokens(query.data(), query.data() + query.size()); /// Max depth 0 indicates that depth is not limited IParser::Pos token_iterator(tokens, parse_depth); @@ -188,10 +173,6 @@ public: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unable to parse JSONPath"); } - /// Get data and offsets for 2 argument (JSON) - const ColumnString::Chars & chars_json = col_json_string->getChars(); - const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); - JSONParser json_parser; using Element = typename JSONParser::Element; Element document; @@ -200,10 +181,9 @@ public: /// Parse JSON for every row Impl impl; GeneratorJSONPath generator_json_path(res); - for (const auto i : collections::range(0, input_rows_count)) + for (size_t i = 0; i < input_rows_count; ++i) { - std::string_view json{ - reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + std::string_view json = json_column.column->getDataAt(i).toView(); document_ok = json_parser.parse(json, document); bool added_to_column = false; From 5e43b733b2467d398b1ff85e8940508bb2b98565 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 06:00:32 +0100 Subject: [PATCH 171/356] Add a test --- tests/queries/0_stateless/03003_sql_json_nonsense.reference | 1 + tests/queries/0_stateless/03003_sql_json_nonsense.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/03003_sql_json_nonsense.reference create mode 100644 tests/queries/0_stateless/03003_sql_json_nonsense.sql diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.reference b/tests/queries/0_stateless/03003_sql_json_nonsense.reference new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.reference @@ -0,0 +1 @@ + diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.sql b/tests/queries/0_stateless/03003_sql_json_nonsense.sql new file mode 100644 index 00000000000..9b7beb42cf3 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.sql @@ -0,0 +1 @@ +SELECT JSON_QUERY('{"x":1}', '$[\'hello\']', materialize(toLowCardinality('x'))); From 53e0d01d5e2909ee1370bcbeab93b4b8cfd3c26a Mon Sep 17 00:00:00 2001 From: beetelbrox <9376816+Beetelbrox@users.noreply.github.com> Date: Mon, 4 Mar 2024 10:28:27 +0100 Subject: [PATCH 172/356] Do not send metadata headers on unsupported multipartoperations --- src/IO/S3/Requests.cpp | 15 +++++++++++++++ src/IO/S3/Requests.h | 19 ++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/IO/S3/Requests.cpp b/src/IO/S3/Requests.cpp index 56d2e44a2c4..a4e61987bdf 100644 --- a/src/IO/S3/Requests.cpp +++ b/src/IO/S3/Requests.cpp @@ -52,6 +52,20 @@ Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() return headers; } +void CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's CompleteMultipartUpload doesn't support metadata headers so we skip adding them + if(!headerName.starts_with("x-amz-meta-")) + Model::CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + +void UploadPartRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's UploadPart doesn't support metadata headers so we skip adding them + if(!headerName.starts_with("x-amz-meta-")) + Model::UploadPartRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + Aws::String ComposeObjectRequest::SerializePayload() const { if (component_names.empty()) @@ -70,6 +84,7 @@ Aws::String ComposeObjectRequest::SerializePayload() const return payload_doc.ConvertToString(); } + void ComposeObjectRequest::AddQueryStringParameters(Aws::Http::URI & /*uri*/) const { } diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index bfb94a5a67e..e02ccd8d79e 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -107,12 +107,21 @@ using ListObjectsV2Request = ExtendedRequest; using ListObjectsRequest = ExtendedRequest; using GetObjectRequest = ExtendedRequest; -using CreateMultipartUploadRequest = ExtendedRequest; -using CompleteMultipartUploadRequest = ExtendedRequest; -using AbortMultipartUploadRequest = ExtendedRequest; -using UploadPartRequest = ExtendedRequest; -using UploadPartCopyRequest = ExtendedRequest; +class UploadPartRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; +class CompleteMultipartUploadRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; + +using CreateMultipartUploadRequest = ExtendedRequest; +using AbortMultipartUploadRequest = ExtendedRequest; +using UploadPartCopyRequest = ExtendedRequest; using PutObjectRequest = ExtendedRequest; using DeleteObjectRequest = ExtendedRequest; using DeleteObjectsRequest = ExtendedRequest; From 229945cdd2c1544f7eb6bdafa09f9da67dc8d01b Mon Sep 17 00:00:00 2001 From: beetelbrox <9376816+Beetelbrox@users.noreply.github.com> Date: Mon, 4 Mar 2024 10:32:45 +0100 Subject: [PATCH 173/356] Fix whitespace --- src/IO/S3/Requests.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index e02ccd8d79e..7b4c3698f10 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -122,6 +122,7 @@ public: using CreateMultipartUploadRequest = ExtendedRequest; using AbortMultipartUploadRequest = ExtendedRequest; using UploadPartCopyRequest = ExtendedRequest; + using PutObjectRequest = ExtendedRequest; using DeleteObjectRequest = ExtendedRequest; using DeleteObjectsRequest = ExtendedRequest; From 83c1c537d582dd5ebd461623f7bf1ff427b27e77 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 4 Mar 2024 10:48:50 +0100 Subject: [PATCH 174/356] Execute requests in order --- src/Coordination/KeeperStateMachine.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e87ef037285..0c398a0d549 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -440,10 +440,11 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } ProfileEvents::increment(ProfileEvents::KeeperCommits); - keeper_context->setLastCommitIndex(log_idx); if (commit_callback) commit_callback(log_idx, *request_for_session); + + keeper_context->setLastCommitIndex(log_idx); } catch (...) { From cbad19956cd2a078bb9304f90e0675c9c081403e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 4 Mar 2024 10:09:19 +0000 Subject: [PATCH 175/356] Fix test with analyzer --- tests/queries/0_stateless/03000_minmax_index_first.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql index 4db232880de..5dae245a0a0 100644 --- a/tests/queries/0_stateless/03000_minmax_index_first.sql +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -12,4 +12,4 @@ PRIMARY KEY k; INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(1000000); -SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain ilike '%Name%'; +SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain like '%Name%'; From 2e64b6722f9eea99d850f11f18e95f00086dfb0a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 4 Mar 2024 11:42:21 +0100 Subject: [PATCH 176/356] Revert "Synchronize parsers" --- src/Interpreters/InterpreterSystemQuery.cpp | 7 +------ src/Parsers/ASTSystemQuery.cpp | 8 -------- src/Parsers/ASTSystemQuery.h | 5 ----- src/Parsers/ParserSystemQuery.cpp | 20 ++++++-------------- 4 files changed, 7 insertions(+), 33 deletions(-) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 292c538c8c7..a078d99facf 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1209,12 +1209,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_DISK_METADATA_CACHE: - case Type::DROP_DISTRIBUTED_CACHE: - case Type::STOP_VIRTUAL_PARTS_UPDATE: - case Type::START_VIRTUAL_PARTS_UPDATE: - { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only available in ClickHouse Cloud, https://clickhouse.cloud/"); - } + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); case Type::RELOAD_DICTIONARY: case Type::RELOAD_DICTIONARIES: case Type::RELOAD_EMBEDDED_DICTIONARIES: diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 66f949ae3b5..0713737af95 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -172,8 +172,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: - case Type::START_VIRTUAL_PARTS_UPDATE: - case Type::STOP_VIRTUAL_PARTS_UPDATE: { if (table) { @@ -296,12 +294,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s } break; } - case Type::DROP_DISTRIBUTED_CACHE: - { - if (!distributed_cache_servive_id.empty()) - settings.ostr << (settings.hilite ? hilite_none : "") << " " << distributed_cache_servive_id; - break; - } case Type::UNFREEZE: { print_keyword(" WITH NAME "); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index b6fa790315e..9aa90f499d0 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -30,7 +30,6 @@ public: DROP_QUERY_CACHE, DROP_COMPILED_EXPRESSION_CACHE, DROP_FILESYSTEM_CACHE, - DROP_DISTRIBUTED_CACHE, DROP_DISK_METADATA_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, @@ -99,8 +98,6 @@ public: STOP_VIEWS, CANCEL_VIEW, TEST_VIEW, - STOP_VIRTUAL_PARTS_UPDATE, - START_VIRTUAL_PARTS_UPDATE, END }; @@ -129,8 +126,6 @@ public: UInt64 seconds{}; String filesystem_cache_name; - String distributed_cache_servive_id; - std::string key_to_drop; std::optional offset_to_drop; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index facf1f8b820..a50e65aa134 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -14,6 +14,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + [[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { @@ -392,8 +397,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: - case Type::STOP_VIRTUAL_PARTS_UPDATE: - case Type::START_VIRTUAL_PARTS_UPDATE: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -467,15 +470,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } - case Type::DROP_DISTRIBUTED_CACHE: - { - ParserLiteral parser; - ASTPtr ast; - if (!parser.parse(pos, ast, expected)) - return false; - res->distributed_cache_servive_id = ast->as()->value.safeGet(); - break; - } case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; @@ -488,9 +482,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } case Type::DROP_DISK_METADATA_CACHE: { - if (!parseQueryWithOnClusterAndTarget(res, pos, expected, SystemQueryTargetType::Disk)) - return false; - break; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); } case Type::DROP_SCHEMA_CACHE: { From 41ff818a20c8b196f08155445cc29504346d998b Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 4 Mar 2024 12:53:41 +0100 Subject: [PATCH 177/356] fix toStartOfInterval --- src/Functions/DateTimeTransforms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 5467cf77085..2c5d8add0db 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -538,7 +538,7 @@ struct ToStartOfInterval { Int64 scale_diff = scale_multiplier / static_cast(1000000); if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result - return (t / microseconds + scale_diff / 2) / scale_diff * microseconds; + return (t + scale_diff / 2) / (microseconds * scale_diff) * microseconds; else return ((t + 1) / microseconds / scale_diff - 1) * microseconds; } @@ -581,7 +581,7 @@ struct ToStartOfInterval { Int64 scale_diff = scale_multiplier / static_cast(1000); if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result - return (t / milliseconds + scale_diff / 2) / scale_diff * milliseconds; + return (t + scale_diff / 2) / (milliseconds * scale_diff) * milliseconds; else return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; } From e8f4e4eb772cf7576d4307af7e5e4e84c8600904 Mon Sep 17 00:00:00 2001 From: beetelbrox <9376816+Beetelbrox@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:54:55 +0100 Subject: [PATCH 178/356] Fix formatting --- src/IO/S3/Requests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/S3/Requests.cpp b/src/IO/S3/Requests.cpp index a4e61987bdf..50ed2e21bfc 100644 --- a/src/IO/S3/Requests.cpp +++ b/src/IO/S3/Requests.cpp @@ -55,14 +55,14 @@ Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() void CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) { // S3's CompleteMultipartUpload doesn't support metadata headers so we skip adding them - if(!headerName.starts_with("x-amz-meta-")) + if (!headerName.starts_with("x-amz-meta-")) Model::CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); } void UploadPartRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) { // S3's UploadPart doesn't support metadata headers so we skip adding them - if(!headerName.starts_with("x-amz-meta-")) + if (!headerName.starts_with("x-amz-meta-")) Model::UploadPartRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); } From 4e55f2c90dae6b3c52e6af0107993613cd8b563a Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 4 Mar 2024 13:00:10 +0100 Subject: [PATCH 179/356] update test for toStartof... --- .../02956_fix_to_start_of_milli_microsecond.reference | 3 +++ .../0_stateless/02956_fix_to_start_of_milli_microsecond.sql | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index d3a002c4fd4..413c79828c7 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -2,3 +2,6 @@ 2023-10-09 10:11:12.001 2023-10-09 10:11:12.000 2023-10-09 10:11:12.000 +2023-10-09 00:00:00.000000 +2023-10-09 00:00:00.000 +2023-10-09 00:00:00 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql index 178f21a9e63..15753d4532c 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql @@ -1,4 +1,7 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000500', 6), toIntervalMillisecond(1)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000499', 6), toIntervalMillisecond(1)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); \ No newline at end of file +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalMicrosecond(100000000)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalMillisecond(100000)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalSecond(100)); \ No newline at end of file From f212c9c2302727e6f3732eaf66ae2d438d4cdd58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 13:14:13 +0100 Subject: [PATCH 180/356] Fix crash in arrayEnumerateRanked --- src/Functions/array/arrayEnumerateRanked.cpp | 135 ++++++++++-------- src/Functions/array/arrayEnumerateRanked.h | 22 +-- .../0_stateless/00909_arrayEnumerateUniq.sql | 18 +-- .../03003_arrayEnumerate_crash.reference | 0 .../03003_arrayEnumerate_crash.sql | 2 + 5 files changed, 101 insertions(+), 76 deletions(-) create mode 100644 tests/queries/0_stateless/03003_arrayEnumerate_crash.reference create mode 100644 tests/queries/0_stateless/03003_arrayEnumerate_crash.sql diff --git a/src/Functions/array/arrayEnumerateRanked.cpp b/src/Functions/array/arrayEnumerateRanked.cpp index dd597d607dc..69d8954bfcf 100644 --- a/src/Functions/array/arrayEnumerateRanked.cpp +++ b/src/Functions/array/arrayEnumerateRanked.cpp @@ -1,8 +1,8 @@ -#include #include +#include #include -#include "arrayEnumerateRanked.h" +#include namespace DB { @@ -12,88 +12,105 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) +ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments, const char * function_name) { const size_t num_arguments = arguments.size(); + if (!num_arguments) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Missing arguments for function arrayEnumerateUniqRanked"); DepthType clear_depth = 1; - DepthTypes depths; + size_t i = 0; + if (const DataTypeArray * type_array = typeid_cast(arguments[0].type.get()); !type_array) + { + /// If the first argument is not an array, it must be a const positive and non zero number + const auto & depth_column = arguments[i].column; + if (!depth_column || !isColumnConst(*depth_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of {} must be Const(UInt64)", function_name); + Field f = assert_cast(*depth_column).getField(); + if (f.getType() != Field::Types::UInt64 || f.safeGet() == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of {} must be a positive integer", function_name); - /// function signature is the following: - /// f(c0, arr1, c1, arr2, c2, ...) - /// - /// c0 is something called "clear_depth" here. + clear_depth = static_cast(f.safeGet()); + i++; + } + + + /// The rest of the arguments must be in the shape: arr1, c1, arr2, c2, ... /// cN... - how deep to look into the corresponding arrN, (called "depths" here) - /// may be omitted - then it means "look at the full depth". - - size_t array_num = 0; - DepthType prev_array_depth = 0; - for (size_t i = 0; i < num_arguments; ++i) + /// may be omitted - then it means "look at the full depth" + DepthTypes depths; + for (; i < num_arguments; i++) { const DataTypePtr & type = arguments[i].type; - const DataTypeArray * type_array = typeid_cast(type.get()); + const DataTypeArray * current_type_array = typeid_cast(type.get()); + if (!current_type_array) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} type of function {}. Expected an Array, got {}", + i + 1, + function_name, + type->getName()); - if (type_array) + if (i == num_arguments - 1) { - if (depths.size() < array_num && prev_array_depth) - depths.emplace_back(prev_array_depth); - - prev_array_depth = static_cast(type_array->getNumberOfDimensions()); - ++array_num; + depths.emplace_back(current_type_array->getNumberOfDimensions()); } else { - const auto & depth_column = arguments[i].column; - - if (depth_column && isColumnConst(*depth_column)) + const DataTypeArray * next_argument_array = typeid_cast(arguments[i + 1].type.get()); + if (next_argument_array) { - UInt64 value = assert_cast(*depth_column).getValue(); - if (!value) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked " - "or arrayEnumerateDenseRanked: depth ({}) cannot be less or equal 0.", - std::to_string(value)); - - if (i == 0) - { - clear_depth = static_cast(value); - } - else - { - if (depths.size() >= array_num) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked " - "or arrayEnumerateDenseRanked: depth ({}) for missing array.", - std::to_string(value)); - if (value > prev_array_depth) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth={}" - " for array with depth={}.", - std::to_string(value), std::to_string(prev_array_depth)); - - depths.emplace_back(value); - } + depths.emplace_back(current_type_array->getNumberOfDimensions()); + } + else + { + i++; + /// The following argument is not array, so it must be a const positive integer with the depth + const auto & depth_column = arguments[i].column; + if (!depth_column || !isColumnConst(*depth_column)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} type of function {}. Expected an Array or Const(UInt64), got {}", + i + 1, + function_name, + arguments[i].type->getName()); + Field f = assert_cast(*depth_column).getField(); + if (f.getType() != Field::Types::UInt64 || f.safeGet() == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} of function {}. Expected a positive integer", + i + 1, + function_name); + UInt64 value = f.safeGet(); + UInt64 prev_array_depth = current_type_array->getNumberOfDimensions(); + if (value > prev_array_depth) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} of function {}. Required depth '{}' is larger than the array depth ({})", + i + 1, + function_name, + value, + prev_array_depth); + depths.emplace_back(value); } } } - if (depths.size() < array_num) - depths.emplace_back(prev_array_depth); - if (depths.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " - "at least one array should be passed."); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Incorrect arguments for function {}: At least one array should be passed", function_name); DepthType max_array_depth = 0; for (auto depth : depths) max_array_depth = std::max(depth, max_array_depth); if (clear_depth > max_array_depth) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " - "clear_depth ({}) can't be larger than max_array_depth ({}).", - std::to_string(clear_depth), std::to_string(max_array_depth)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function {}: clear_depth ({}) can't be larger than max_array_depth ({})", + function_name, + clear_depth, + max_array_depth); return {clear_depth, depths, max_array_depth}; } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 1a920260906..04fa305368d 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -84,7 +84,7 @@ struct ArraysDepths }; /// Return depth info about passed arrays -ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments); +ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments, const char * function_name); template class FunctionArrayEnumerateRankedExtended : public IFunction @@ -105,7 +105,7 @@ public: "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", getName(), arguments.size()); - const ArraysDepths arrays_depths = getArraysDepths(arguments); + const ArraysDepths arrays_depths = getArraysDepths(arguments, Derived::name); /// Return type is the array of the depth as the maximum effective depth of arguments, containing UInt32. @@ -154,7 +154,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( Columns array_holders; ColumnPtr offsets_column; - const ArraysDepths arrays_depths = getArraysDepths(arguments); + const ArraysDepths arrays_depths = getArraysDepths(arguments, Derived::name); /// If the column is Array - return it. If the const Array - materialize it, keep ownership and return. auto get_array_column = [&](const auto & column) -> const DB::ColumnArray * @@ -213,17 +213,23 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[col_depth] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "Lengths and effective depths of all arrays passed to {} must be equal", + getName()); } } } if (col_depth < arrays_depths.depths[array_num]) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "{}: Passed array number {} depth ({}) is more than the actual array depth ({}).", - getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth); + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "{}: Passed array number {} depth ({}) is more than the actual array depth ({})", + getName(), + array_num, + std::to_string(arrays_depths.depths[array_num]), + col_depth); } auto * array_data = &array->getData(); diff --git a/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql b/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql index 33097c99272..0bdb338e9d2 100644 --- a/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql +++ b/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql @@ -181,15 +181,15 @@ SELECT arrayEnumerateUniqRanked([1,2], 1, 2); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, 4, 5); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, [4], 5); -- { serverError 36 } SELECT arrayEnumerateDenseRanked([[[[[[[[[[42]]]]]]]]]]); -SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError 170 } -SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(-10, ['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(1, ['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(-101, ['\0']); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(1.1, [10,20,10,30]); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked([10,20,10,30], 0.4); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked([10,20,10,30], 1.8); -- { serverError 170 } +SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(-10, ['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(1, ['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(-101, ['\0']); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(1.1, [10,20,10,30]); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked([10,20,10,30], 0.4); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked([10,20,10,30], 1.8); -- { serverError BAD_ARGUMENTS } SELECT arrayEnumerateUniqRanked(1, [], 1000000000); -- { serverError 36 } diff --git a/tests/queries/0_stateless/03003_arrayEnumerate_crash.reference b/tests/queries/0_stateless/03003_arrayEnumerate_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql b/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql new file mode 100644 index 00000000000..21102ddbb6a --- /dev/null +++ b/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql @@ -0,0 +1,2 @@ +SELECT arrayEnumerateUniqRanked(arrayEnumerateUniqRanked([[1, 2, 3], [2, 2, 1], [3]]), materialize(1 AS x) OR toLowCardinality(-9223372036854775808)); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateUniqRanked([[1, 2, 3], [2, 2, 1], [3]], number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } From ddf0dd7eb8784b3fb49ccf948707fe509c5f45ee Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Mar 2024 12:19:47 +0000 Subject: [PATCH 181/356] Fix crash when using input() in INSERT SELECT JOIN --- src/Interpreters/Context.cpp | 2 +- .../03005_input_function_in_join.reference | 0 .../0_stateless/03005_input_function_in_join.sql | 11 +++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03005_input_function_in_join.reference create mode 100644 tests/queries/0_stateless/03005_input_function_in_join.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 8304a876fb1..7f51f41ecae 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1790,7 +1790,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; - if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) + if (select_query_hint && use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { const auto & insert_columns = DatabaseCatalog::instance() .getTable(getInsertionTable(), shared_from_this()) diff --git a/tests/queries/0_stateless/03005_input_function_in_join.reference b/tests/queries/0_stateless/03005_input_function_in_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03005_input_function_in_join.sql b/tests/queries/0_stateless/03005_input_function_in_join.sql new file mode 100644 index 00000000000..85f275ae11f --- /dev/null +++ b/tests/queries/0_stateless/03005_input_function_in_join.sql @@ -0,0 +1,11 @@ +create table test (a Int8) engine = MergeTree order by tuple(); +INSERT INTO test +SELECT * FROM ( + SELECT number + FROM system.numbers + LIMIT 10 +) AS x +INNER JOIN input('a UInt64') AS y ON x.number = y.a +Format CSV 42; -- {serverError INVALID_USAGE_OF_INPUT} +drop table test; + From c634012dbf8fa4570f9f60ff3e17b14d457f3c3d Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Mon, 4 Mar 2024 09:26:25 +0100 Subject: [PATCH 182/356] Reproducer for insert-select + insert_deduplication_token bug --- ...001_insert_threads_deduplication.reference | 10 +++ .../03001_insert_threads_deduplication.sh | 82 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 tests/queries/0_stateless/03001_insert_threads_deduplication.reference create mode 100755 tests/queries/0_stateless/03001_insert_threads_deduplication.sh diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference new file mode 100644 index 00000000000..0c6a5a55576 --- /dev/null +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference @@ -0,0 +1,10 @@ +This bug has been there forever. Present in 22.2 +- When using multiple threads the insert produces 3 parts causing undesired deduplication. +- When using a single thread the insert produces 1 part without deduplication. +1 +4 +This bug has been introduced in CH 24.2+. See https://github.com/ClickHouse/ClickHouse/pull/59448 +- When using remote function and multiple threads the insert produces 3 parts causing undesired deduplication. +- When using remote function and a single thread the insert produces 1 part without deduplication. +1 +4 diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sh b/tests/queries/0_stateless/03001_insert_threads_deduplication.sh new file mode 100755 index 00000000000..cf87f7c2c67 --- /dev/null +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.sh @@ -0,0 +1,82 @@ +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo " +DROP TABLE IF EXISTS landing SYNC; +CREATE TABLE landing +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp; + +SYSTEM STOP MERGES landing; -- Stopping merges to force 3 parts + +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; + +DROP TABLE IF EXISTS ds SYNC; + +CREATE TABLE ds +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp +SETTINGS non_replicated_deduplication_window=1000; + +SELECT 'This bug has been there forever. Present in 22.2'; +SELECT '- When using multiple threads the insert produces 3 parts causing undesired deduplication.'; +SELECT '- When using a single thread the insert produces 1 part without deduplication.'; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', + max_insert_threads=5; + +SELECT count() FROM ds; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', + max_insert_threads=1; + +SELECT count() FROM ds; +" | $CLICKHOUSE_CLIENT -n + +echo " +CREATE TABLE ds_remote +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp +SETTINGS non_replicated_deduplication_window=1000; + +SELECT 'This bug has been introduced in CH 24.2+. See https://github.com/ClickHouse/ClickHouse/pull/59448'; +SELECT '- When using remote function and multiple threads the insert produces 3 parts causing undesired deduplication.'; +SELECT '- When using remote function and a single thread the insert produces 1 part without deduplication.'; + +INSERT INTO ds_remote SELECT * FROM remote('localhost:$CLICKHOUSE_PORT_TCP', $CLICKHOUSE_DATABASE, landing) +SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', + max_insert_threads=5; + +SELECT count() FROM ds_remote; + +INSERT INTO ds_remote SELECT * FROM remote('localhost:$CLICKHOUSE_PORT_TCP', $CLICKHOUSE_DATABASE, landing) +SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', + max_insert_threads=1; + +SELECT count() FROM ds_remote; +" | $CLICKHOUSE_LOCAL -n + +echo " +DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS ds SYNC; +" | $CLICKHOUSE_CLIENT -n From 63747271e8b57076467811f5a537d63c60e71cb3 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Mon, 4 Mar 2024 09:53:09 +0100 Subject: [PATCH 183/356] Fix test --- tests/queries/0_stateless/03001_insert_threads_deduplication.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sh b/tests/queries/0_stateless/03001_insert_threads_deduplication.sh index cf87f7c2c67..154e578a7a8 100755 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.sh +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.sh @@ -1,3 +1,5 @@ +#!/bin/bash + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh From 63e40203d1627669576a18a83d5f0dbf139e86cd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 22:00:29 +0100 Subject: [PATCH 184/356] Use python zipfile to have x-platform idempotent lambda packages --- .../build_and_deploy_archive.sh | 54 +++++++++++-------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh index 6ba0987010a..aa0ff912567 100644 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -29,34 +29,46 @@ if [ -e "$PACKAGE.zip" ] && [ -z "$FORCE" ]; then [ -n "$REBUILD" ] || exit 0 fi +docker_cmd=( + docker run -i --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash + --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" +) rm -rf "$PACKAGE" "$PACKAGE".zip mkdir "$PACKAGE" cp app.py "$PACKAGE" if [ -f requirements.txt ]; then VENV=lambda-venv rm -rf "$VENV" - docker run --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \ - --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \ - -exc " - '$PY_EXEC' -m venv '$VENV' && - source '$VENV/bin/activate' && - pip install -r requirements.txt && - # To have consistent pyc files - find '$VENV/lib' -name '*.pyc' -delete - find '$VENV/lib' ! -type d -exec touch -t 201212121212 {} + - python -m compileall - " - cp -rT "$VENV/lib/$PY_EXEC/site-packages/" "$PACKAGE" - rm -r "$PACKAGE"/{pip,pip-*,setuptools,setuptools-*} - # zip stores metadata about timestamps - find "$PACKAGE" ! -type d -exec touch -t 201212121212 {} + + "${docker_cmd[@]}" -ex < Date: Mon, 4 Mar 2024 11:49:45 +0100 Subject: [PATCH 185/356] Remove python bytecode, make consistent file-permissions It's impossible to have persistent pyc files Each time they are built with different content, for example: > cmp -bl lambda-package*/charset_normalizer/__pycache__/constant.cpython-310.pyc 15582 6 ^F 4 ^D 15583 164 t 155 m 15584 141 a 142 b 15586 164 t 163 s 15587 151 i 332 M-Z 15588 163 s 6 ^F 15589 332 M-Z 164 t 15590 4 ^D 141 a 15591 155 m 143 c 15592 142 b 164 t 15593 143 c 151 i 17425 74 < 75 = 17428 76 > 46 & 17429 332 M-Z 372 M-z 17431 173 { 55 - 17434 75 = 174 | 17437 57 / 72 : 17440 54 , 73 ; 17441 372 M-z 332 M-Z 17443 174 | 175 } 17446 55 - 54 , 17447 372 M-z 332 M-Z 17449 46 & 173 { 17452 72 : 76 > 17455 42 " 74 < 17458 73 ; 133 [ 17461 135 ] 42 " 17464 133 [ 135 ] 17465 332 M-Z 372 M-z 17467 175 } 57 / 17503 332 M-Z 162 r 17504 5 ^E 130 X 17505 152 j 0 ^@ 17506 157 o 0 ^@ 17507 150 h 0 ^@ 17508 141 a 332 M-Z 17509 142 b 5 ^E 17510 162 r 152 j 17511 130 X 157 o 17512 0 ^@ 150 h 17513 0 ^@ 141 a 17514 0 ^@ 142 b 17536 5 ^E 2 ^B 17537 143 c 150 h 17538 160 p 172 z 17539 71 9 332 M-Z 17540 65 5 5 ^E 17541 60 0 143 c 17542 332 M-Z 160 p 17543 2 ^B 71 9 17544 150 h 65 5 17545 172 z 60 0 --- tests/ci/team_keys_lambda/build_and_deploy_archive.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh index aa0ff912567..b72bce4a677 100644 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -45,10 +45,9 @@ if [ -f requirements.txt ]; then pip install -r requirements.txt && # To have consistent pyc files find '$VENV/lib' -name '*.pyc' -delete - find '$VENV/lib' ! -type d -exec touch -t 201212121212 {} + - python -m compileall cp -rT '$VENV/lib/$PY_EXEC/site-packages/' '$PACKAGE' rm -r '$PACKAGE'/{pip,pip-*,setuptools,setuptools-*} + chmod 0777 -R '$PACKAGE' EOF fi # Create zip archive via python zipfile to have it cross-platform From 9c5ed092f8bbb4e5d5dad5b3819879ed6fa04be1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 4 Mar 2024 14:15:07 +0100 Subject: [PATCH 186/356] Fix CREATE VIEW with scalar subquery #ci_set_analyzer --- src/Planner/PlannerActionsVisitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 89d843a28ac..e5610dd6fe7 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -110,7 +110,7 @@ public: } else { - if (constant_node.hasSourceExpression()) + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) result = calculateActionNodeNameForConstant(constant_node); @@ -566,7 +566,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } else { - if (constant_node.hasSourceExpression()) + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) return calculateActionNodeNameForConstant(constant_node); From 7a851bece3f2e7ce2dfe6e819da38975fa5776eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 14:15:15 +0100 Subject: [PATCH 187/356] Update tzdata to 2024a --- contrib/cctz | 2 +- src/Core/SettingsFields.cpp | 9 +++++++++ src/Core/SettingsFields.h | 8 +------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/contrib/cctz b/contrib/cctz index 8529bcef5cd..7918cb7afe8 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c +Subproject commit 7918cb7afe82e53428e39a045a437fdfd4f3df47 diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index e514ced6f68..001d3e09dc9 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -8,7 +8,9 @@ #include #include #include + #include +#include #include @@ -544,6 +546,13 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) *this = std::move(str); } +void SettingFieldTimezone::validateTimezone(const std::string & tz_str) +{ + cctz::time_zone validated_tz; + if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); +} + String SettingFieldCustom::toString() const { return value.dump(); diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 22c1cf8a267..452f3f149ab 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -608,12 +607,7 @@ struct SettingFieldTimezone void readBinary(ReadBuffer & in); private: - void validateTimezone(const std::string & tz_str) - { - cctz::time_zone validated_tz; - if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); - } + void validateTimezone(const std::string & tz_str); }; /// Can keep a value of any type. Used for user-defined settings. From 1da1bbeae2aee280fca052acd2b19672188a1ce1 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Mon, 4 Mar 2024 14:35:57 +0100 Subject: [PATCH 188/356] Set streams to 1 when using insert_deduplication_token --- src/Interpreters/InterpreterInsertQuery.cpp | 6 ++++++ .../03001_insert_threads_deduplication.reference | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index e27a8bd414b..df833803970 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -513,6 +513,12 @@ BlockIO InterpreterInsertQuery::execute() const bool resize_to_max_insert_threads = !table->isView() && views.empty(); pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + const String & deduplication_token = settings.insert_deduplication_token; + if (!deduplication_token.empty()) + pre_streams_size = 1; + if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; } diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference index 0c6a5a55576..0791b98cc09 100644 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference @@ -1,10 +1,10 @@ This bug has been there forever. Present in 22.2 - When using multiple threads the insert produces 3 parts causing undesired deduplication. - When using a single thread the insert produces 1 part without deduplication. -1 -4 +3 +6 This bug has been introduced in CH 24.2+. See https://github.com/ClickHouse/ClickHouse/pull/59448 - When using remote function and multiple threads the insert produces 3 parts causing undesired deduplication. - When using remote function and a single thread the insert produces 1 part without deduplication. -1 -4 +3 +6 From 9f5fe176ada41ba7bf72f19df53d79f65290a401 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 4 Mar 2024 14:41:01 +0100 Subject: [PATCH 189/356] Catch exceptions on finalize --- src/Server/InterserverIOHTTPHandler.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index c41d68bab02..d2e0ed93667 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -117,7 +117,17 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe if (auto [message, success] = checkAuthentication(request); success) { processQuery(request, response, used_output); - used_output.out->finalize(); + + try + { + used_output.out->finalize(); + } + catch (...) + { + tryLogCurrentException(log, "Failed to finalize response write buffer"); + return; + } + LOG_DEBUG(log, "Done processing query"); } else From f2387262ce9b4e440bf39130e4c08620a523371a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 4 Mar 2024 15:32:15 +0100 Subject: [PATCH 190/356] Fix crash with different allow_experimental_analyzer value in subqueries --- src/Interpreters/executeQuery.cpp | 31 +++++++++++++++++++ .../03003_analyzer_setting.reference | 0 .../0_stateless/03003_analyzer_setting.sql | 9 ++++++ 3 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/03003_analyzer_setting.reference create mode 100644 tests/queries/0_stateless/03003_analyzer_setting.sql diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 53fd5088372..18f0cd6601f 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -651,6 +651,36 @@ static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context) } } +void validateAnalyzerSettings(ASTPtr ast, bool context_value) +{ + if (ast->as()) + return; + + bool top_level = context_value; + + std::vector nodes_to_process{ ast }; + while (!nodes_to_process.empty()) + { + auto node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + if (auto * set_query = node->as()) + { + if (auto * value = set_query->changes.tryGet("allow_experimental_analyzer")) + { + if (top_level != value->safeGet()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level); + } + } + + for (auto child : node->children) + { + if (child) + nodes_to_process.push_back(std::move(child)); + } + } +} + static std::tuple executeQueryImpl( const char * begin, const char * end, @@ -861,6 +891,7 @@ static std::tuple executeQueryImpl( /// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter), /// to allow settings to take effect. InterpreterSetQuery::applySettingsFromQuery(ast, context); + validateAnalyzerSettings(ast, context->getSettingsRef().allow_experimental_analyzer); if (auto * insert_query = ast->as()) insert_query->tail = istr; diff --git a/tests/queries/0_stateless/03003_analyzer_setting.reference b/tests/queries/0_stateless/03003_analyzer_setting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_analyzer_setting.sql b/tests/queries/0_stateless/03003_analyzer_setting.sql new file mode 100644 index 00000000000..43e1bd0d955 --- /dev/null +++ b/tests/queries/0_stateless/03003_analyzer_setting.sql @@ -0,0 +1,9 @@ +CREATE TABLE test (dummy Int8) ENGINE = Distributed(test_cluster_two_shards, 'system', 'one'); + +SET allow_experimental_analyzer = 0; + +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 1); -- { serverError LOGICAL_ERROR} + +SET allow_experimental_analyzer = 1; + +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 0); -- { serverError LOGICAL_ERROR} From d7de634123d58e475f8023b98245ded4c9eaf66a Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Mar 2024 14:37:49 +0000 Subject: [PATCH 191/356] Update test --- .../0_stateless/03005_input_function_in_join.reference | 1 + tests/queries/0_stateless/03005_input_function_in_join.sql | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03005_input_function_in_join.reference b/tests/queries/0_stateless/03005_input_function_in_join.reference index e69de29bb2d..0cfbf08886f 100644 --- a/tests/queries/0_stateless/03005_input_function_in_join.reference +++ b/tests/queries/0_stateless/03005_input_function_in_join.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03005_input_function_in_join.sql b/tests/queries/0_stateless/03005_input_function_in_join.sql index 85f275ae11f..8a6b4a48a8d 100644 --- a/tests/queries/0_stateless/03005_input_function_in_join.sql +++ b/tests/queries/0_stateless/03005_input_function_in_join.sql @@ -1,11 +1,14 @@ +drop table if exists test; create table test (a Int8) engine = MergeTree order by tuple(); INSERT INTO test -SELECT * FROM ( +SELECT x.number FROM ( SELECT number FROM system.numbers LIMIT 10 ) AS x INNER JOIN input('a UInt64') AS y ON x.number = y.a -Format CSV 42; -- {serverError INVALID_USAGE_OF_INPUT} +Format CSV 2 +; +select * from test; drop table test; From 8d106f6c7011e4a938dbc6b6dfa5d5970b8eb5be Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 4 Mar 2024 06:38:13 -0800 Subject: [PATCH 192/356] Fix_max_query_size_for_kql_compound_operator : update test case for better readability --- tests/queries/0_stateless/02366_kql_mvexpand.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql index ac1a6d9a8f4..af336a19638 100644 --- a/tests/queries/0_stateless/02366_kql_mvexpand.sql +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -35,5 +35,5 @@ print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; mv_expand_test_table | mv-expand c to typeof(bool); SET max_query_size = 28; SET dialect='kusto'; -mv_expand_test_table | mv-expand c, d; -- { serverError 62 } +mv_expand_test_table | mv-expand c, d; -- { serverError SYNTAX_ERROR } SET max_query_size=262144; From b72507fdf6fa74e40e42b2e8bc1af56b7bf93725 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Mon, 4 Mar 2024 15:48:38 +0100 Subject: [PATCH 193/356] Simplify test now that the cause is clear --- ...001_insert_threads_deduplication.reference | 8 -- .../03001_insert_threads_deduplication.sh | 84 ------------------- .../03001_insert_threads_deduplication.sql | 42 ++++++++++ 3 files changed, 42 insertions(+), 92 deletions(-) delete mode 100755 tests/queries/0_stateless/03001_insert_threads_deduplication.sh create mode 100644 tests/queries/0_stateless/03001_insert_threads_deduplication.sql diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference index 0791b98cc09..2559e5c49e7 100644 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference @@ -1,10 +1,2 @@ -This bug has been there forever. Present in 22.2 -- When using multiple threads the insert produces 3 parts causing undesired deduplication. -- When using a single thread the insert produces 1 part without deduplication. -3 -6 -This bug has been introduced in CH 24.2+. See https://github.com/ClickHouse/ClickHouse/pull/59448 -- When using remote function and multiple threads the insert produces 3 parts causing undesired deduplication. -- When using remote function and a single thread the insert produces 1 part without deduplication. 3 6 diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sh b/tests/queries/0_stateless/03001_insert_threads_deduplication.sh deleted file mode 100755 index 154e578a7a8..00000000000 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -echo " -DROP TABLE IF EXISTS landing SYNC; -CREATE TABLE landing -( - timestamp DateTime64(3), - status String, - id String -) -ENGINE = MergeTree() -ORDER BY timestamp; - -SYSTEM STOP MERGES landing; -- Stopping merges to force 3 parts - -INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; -INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; -INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; - -DROP TABLE IF EXISTS ds SYNC; - -CREATE TABLE ds -( - timestamp DateTime64(3), - status String, - id String -) -ENGINE = MergeTree() -ORDER BY timestamp -SETTINGS non_replicated_deduplication_window=1000; - -SELECT 'This bug has been there forever. Present in 22.2'; -SELECT '- When using multiple threads the insert produces 3 parts causing undesired deduplication.'; -SELECT '- When using a single thread the insert produces 1 part without deduplication.'; - -INSERT INTO ds SELECT * FROM landing -SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', - max_insert_threads=5; - -SELECT count() FROM ds; - -INSERT INTO ds SELECT * FROM landing -SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', - max_insert_threads=1; - -SELECT count() FROM ds; -" | $CLICKHOUSE_CLIENT -n - -echo " -CREATE TABLE ds_remote -( - timestamp DateTime64(3), - status String, - id String -) -ENGINE = MergeTree() -ORDER BY timestamp -SETTINGS non_replicated_deduplication_window=1000; - -SELECT 'This bug has been introduced in CH 24.2+. See https://github.com/ClickHouse/ClickHouse/pull/59448'; -SELECT '- When using remote function and multiple threads the insert produces 3 parts causing undesired deduplication.'; -SELECT '- When using remote function and a single thread the insert produces 1 part without deduplication.'; - -INSERT INTO ds_remote SELECT * FROM remote('localhost:$CLICKHOUSE_PORT_TCP', $CLICKHOUSE_DATABASE, landing) -SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', - max_insert_threads=5; - -SELECT count() FROM ds_remote; - -INSERT INTO ds_remote SELECT * FROM remote('localhost:$CLICKHOUSE_PORT_TCP', $CLICKHOUSE_DATABASE, landing) -SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', - max_insert_threads=1; - -SELECT count() FROM ds_remote; -" | $CLICKHOUSE_LOCAL -n - -echo " -DROP TABLE IF EXISTS landing SYNC; -DROP TABLE IF EXISTS ds SYNC; -" | $CLICKHOUSE_CLIENT -n diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sql b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql new file mode 100644 index 00000000000..5b5cb1d6845 --- /dev/null +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS ds SYNC; + +CREATE TABLE landing +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp; + +SYSTEM STOP MERGES landing; -- Stopping merges to force 3 parts + +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; + +CREATE TABLE ds +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp +SETTINGS non_replicated_deduplication_window=1000; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', + max_insert_threads=5; + +SELECT count() FROM ds; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', + max_insert_threads=1; + +SELECT count() FROM ds; + +DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS ds SYNC; From 081ed8de2aaea770efc9670b43d7da07f746158f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 4 Mar 2024 16:18:53 +0100 Subject: [PATCH 194/356] Use 64-bit capabilities if available This will fix the following warning in dmesg: capability: warning: `clickhouse-serv' uses 32-bit capabilities (legacy support in use) P.S. I'm not even sure that the fallback code is useful, since _LINUX_CAPABILITY_VERSION_3 had been added long time ago, in Linux 2.6.26 (Released 13 July 2008) Signed-off-by: Azat Khuzhin --- src/Common/hasLinuxCapability.cpp | 39 +++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/Common/hasLinuxCapability.cpp b/src/Common/hasLinuxCapability.cpp index bf236eb5c56..6a4570a498c 100644 --- a/src/Common/hasLinuxCapability.cpp +++ b/src/Common/hasLinuxCapability.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include @@ -16,25 +18,48 @@ namespace ErrorCodes extern const int NETLINK_ERROR; } -static __user_cap_data_struct getCapabilities() +struct Capabilities +{ + UInt64 effective; + UInt64 permitted; + UInt64 inheritable; +}; + +static Capabilities getCapabilities() { /// See man getcap. __user_cap_header_struct request{}; - request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. + request.version = _LINUX_CAPABILITY_VERSION_3; request.pid = getpid(); - __user_cap_data_struct response{}; + Capabilities ret{}; + __user_cap_data_struct response[2] = {}; /// Avoid dependency on 'libcap'. - if (0 != syscall(SYS_capget, &request, &response)) - throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Cannot do 'capget' syscall"); + if (0 == syscall(SYS_capget, &request, response)) + { + ret.effective = static_cast(response[1].effective) << 32 | response[0].effective; + ret.permitted = static_cast(response[1].permitted) << 32 | response[0].permitted; + ret.inheritable = static_cast(response[1].inheritable) << 32 | response[0].inheritable; + return ret; + } - return response; + /// Does not supports V3, fallback to V1. + /// It's enough to check just single CAP_NET_ADMIN capability we are interested. + if (errno == EINVAL && 0 == syscall(SYS_capget, &request, response)) + { + ret.effective = response[0].effective; + ret.permitted = response[0].permitted; + ret.inheritable = response[0].inheritable; + return ret; + } + + throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Cannot do 'capget' syscall"); } bool hasLinuxCapability(int cap) { - static __user_cap_data_struct capabilities = getCapabilities(); + static Capabilities capabilities = getCapabilities(); return (1 << cap) & capabilities.effective; } From fb8241c6519029882371b14d8ba0b1a61108c18d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 4 Mar 2024 16:36:46 +0100 Subject: [PATCH 195/356] Better --- src/Server/InterserverIOHTTPHandler.cpp | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index d2e0ed93667..28045380cd7 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -91,24 +91,35 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe used_output.out = std::make_shared( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); + auto finalize_output = [&] + { + try + { + used_output.out->finalize(); + } + catch (...) + { + tryLogCurrentException(log, "Failed to finalize response write buffer"); + } + }; + auto write_response = [&](const std::string & message) { - auto & out = *used_output.out; if (response.sent()) { - out.finalize(); + finalize_output(); return; } try { - writeString(message, out); - out.finalize(); + writeString(message, *used_output.out); + finalize_output(); } catch (...) { tryLogCurrentException(log); - out.finalize(); + finalize_output(); } }; @@ -117,17 +128,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe if (auto [message, success] = checkAuthentication(request); success) { processQuery(request, response, used_output); - - try - { - used_output.out->finalize(); - } - catch (...) - { - tryLogCurrentException(log, "Failed to finalize response write buffer"); - return; - } - + finalize_output(); LOG_DEBUG(log, "Done processing query"); } else From 70acf7a930f5991aebccfccaf36bf4106300e588 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Mar 2024 16:16:58 +0000 Subject: [PATCH 196/356] Support files without format extension in Filesystem database --- src/Databases/DatabaseFilesystem.cpp | 15 +-------------- ...database_filesystem_format_detection.reference | 2 ++ .../03003_database_filesystem_format_detection.sh | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/03003_database_filesystem_format_detection.reference create mode 100755 tests/queries/0_stateless/03003_database_filesystem_format_detection.sh diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index bc6714a6471..5af1e1ae0d2 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -146,20 +146,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont if (!checkTableFilePath(table_path, context_, throw_on_error)) return {}; - String format; - if (throw_on_error) - { - format = FormatFactory::instance().getFormatFromFileName(table_path); - } - else - { - auto format_maybe = FormatFactory::instance().tryGetFormatFromFileName(table_path); - if (!format_maybe) - return {}; - format = *format_maybe; - } - - auto ast_function_ptr = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); + auto ast_function_ptr = makeASTFunction("file", std::make_shared(table_path)); auto table_function = TableFunctionFactory::instance().get(ast_function_ptr, context_); if (!table_function) diff --git a/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference b/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference new file mode 100644 index 00000000000..87d15e83593 --- /dev/null +++ b/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference @@ -0,0 +1,2 @@ +a Nullable(Int64) +42 diff --git a/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh b/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh new file mode 100755 index 00000000000..8b476a1f82e --- /dev/null +++ b/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +echo '{"a" : 42}' > $DATA_FILE +$CLICKHOUSE_LOCAL -q "desc table \`$DATA_FILE\`" +$CLICKHOUSE_LOCAL -q "select * from \`$DATA_FILE\`" + +rm $DATA_FILE + From 0149b8893ad99d03bbc1ea6dd512c9b08648cfbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 17:42:06 +0100 Subject: [PATCH 197/356] Include multiline logs in fuzzer fatal.log report --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index b4376fe2409..7a0d2939cd3 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -386,7 +386,7 @@ if [ -f core.zst ]; then CORE_LINK='core.zst' fi -rg --text -F '' server.log > fatal.log ||: +sed -n '//,/^$/p' s.log | rg "(^[^202])|" server.log > fatal.log ||: FATAL_LINK='' if [ -s fatal.log ]; then FATAL_LINK='fatal.log' From cc1c3f2da3038e78bc5f2fca3477c71968bae06f Mon Sep 17 00:00:00 2001 From: Brokenice0415 Date: Tue, 5 Mar 2024 00:54:25 +0800 Subject: [PATCH 198/356] add a keeper setting leadership_expiry --- docs/ru/operations/clickhouse-keeper.md | 2 ++ docs/zh/operations/clickhouse-keeper.md | 2 ++ src/Coordination/CoordinationSettings.cpp | 2 ++ src/Coordination/CoordinationSettings.h | 1 + src/Coordination/KeeperServer.cpp | 15 +++++++++++++++ .../test_keeper_four_word_command/test.py | 1 + 6 files changed, 23 insertions(+) diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 9f1301d817d..e1d21dd537c 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -38,6 +38,7 @@ ClickHouse Keeper может иÑпользоватьÑÑ ÐºÐ°Ðº равноце - `dead_session_check_period_ms` — чаÑтота, Ñ ÐºÐ¾Ñ‚Ð¾Ñ€Ð¾Ð¹ ClickHouse Keeper проверÑет мертвые ÑеÑÑии и удалÑет их, в миллиÑекундах (по умолчанию: 500). - `election_timeout_lower_bound_ms` — времÑ, поÑле которого поÑледователь может инициировать перевыбор лидера, еÑли не получил от него контрольный Ñигнал (по умолчанию: 1000). - `election_timeout_upper_bound_ms` — времÑ, поÑле которого поÑледователь должен инициировать перевыбор лидера, еÑли не получил от него контрольный Ñигнал (по умолчанию: 2000). +- `leadership_expiry_ms` — ЕÑли лидер не получает ответа от доÑтаточного количеÑтва поÑледователей в течение Ñтого промежутка времени, он добровольно отказываетÑÑ Ð¾Ñ‚ Ñвоего руководÑтва. При наÑтройке 0 автоматичеÑки уÑтанавливаетÑÑ 20 - кратное значение `heart_beat_interval_ms`, а при наÑтройке меньше 0 лидер не отказываетÑÑ Ð¾Ñ‚ лидерÑтва (по умолчанию 0). - `force_sync` — вызывать `fsync` при каждой запиÑи в журнал координации (по умолчанию: true). - `four_letter_word_white_list` — ÑпиÑок разрешенных 4-Ñ… буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro"). - `fresh_log_gap` — минимальное отÑтавание от лидера в количеÑтве запиÑей журнала поÑле которого поÑледователь Ñчитает ÑÐµÐ±Ñ Ð°ÐºÑ‚ÑƒÐ°Ð»ÑŒÐ½Ñ‹Ð¼ (по умолчанию: 200). @@ -209,6 +210,7 @@ dead_session_check_period_ms=500 heart_beat_interval_ms=500 election_timeout_lower_bound_ms=1000 election_timeout_upper_bound_ms=2000 +leadership_expiry_ms=0 reserved_log_items=1000000000000000 snapshot_distance=10000 auto_forwarding=true diff --git a/docs/zh/operations/clickhouse-keeper.md b/docs/zh/operations/clickhouse-keeper.md index 6d8a570aa12..e4412be2e30 100644 --- a/docs/zh/operations/clickhouse-keeper.md +++ b/docs/zh/operations/clickhouse-keeper.md @@ -45,6 +45,7 @@ ClickHouse Keeper 完全å¯ä»¥ä½œä¸ºZooKeeper的独立替代å“或者作为Click - `heart_beat_interval_ms` — ClickHouse Keeperçš„leaderå‘é€å¿ƒè·³é¢‘率(毫秒)(默认为500)。 - `election_timeout_lower_bound_ms` — 如果follower在此间隔内没有收到leader的心跳,那么它å¯ä»¥å¯åŠ¨leader选举(默认为1000). - `election_timeout_upper_bound_ms` — 如果follower在此间隔内没有收到leader的心跳,那么它必须å¯åŠ¨leader选举(默认为2000)。 +- `leadership_expiry_ms` — 如果leader在此间隔内没有收到足够的follower回å¤ï¼Œé‚£ä¹ˆä»–会主动放弃领导æƒã€‚当被设置为0时会自动设置为`heart_beat_interval_ms`çš„20å€ï¼Œå½“被设置å°äºŽ0æ—¶leaderä¸ä¼šä¸»åŠ¨æ”¾å¼ƒé¢†å¯¼æƒï¼ˆé»˜è®¤ä¸º0)。 - `rotate_log_storage_interval` — å•ä¸ªæ–‡ä»¶ä¸­å­˜å‚¨çš„日志记录数é‡(默认100000æ¡)。 - `reserved_log_items` — 在压缩之å‰éœ€è¦å­˜å‚¨å¤šå°‘å调日志记录(默认100000)。 - `snapshot_distance` — ClickHouse Keeper创建新快照的频率(以日志记录的数é‡ä¸ºå•ä½)(默认100000)。 @@ -214,6 +215,7 @@ dead_session_check_period_ms=500 heart_beat_interval_ms=500 election_timeout_lower_bound_ms=1000 election_timeout_upper_bound_ms=2000 +leadership_expiry_ms=0 reserved_log_items=1000000000000000 snapshot_distance=10000 auto_forwarding=true diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index ea1acf02450..05f691ca76b 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -114,6 +114,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const write_int(static_cast(coordination_settings->election_timeout_lower_bound_ms)); writeText("election_timeout_upper_bound_ms=", buf); write_int(static_cast(coordination_settings->election_timeout_upper_bound_ms)); + writeText("leadership_expiry_ms=", buf); + write_int(static_cast(coordination_settings->leadership_expiry_ms)); writeText("reserved_log_items=", buf); write_int(coordination_settings->reserved_log_items); diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index ed0490681b7..3c8f1d271d0 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -26,6 +26,7 @@ struct Settings; M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \ M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \ + M(Milliseconds, leadership_expiry_ms, 0, "How long will a leader expire after not getting enough peer responses. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \ M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \ M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index cf4399461ab..80d61518e19 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -316,6 +316,21 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co } } + params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning( + coordination_settings->leadership_expiry_ms.totalMilliseconds(), "leadership_expiry_ms", log); + + if (params.leadership_expiry_ > 0) + { + if (params.leadership_expiry_ < params.election_timeout_lower_bound_) + { + LOG_WARNING( + log, + "leadership_expiry_ is smaller than election_timeout_lower_bound_ms. " + "Notice that too small leadership_expiry_ may make Raft group " + "sensitive to network status."); + } + } + params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log); params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log); diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 84dd2a2fd93..44b2b50673a 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -266,6 +266,7 @@ def test_cmd_conf(started_cluster): assert result["heart_beat_interval_ms"] == "500" assert result["election_timeout_lower_bound_ms"] == "1000" assert result["election_timeout_upper_bound_ms"] == "2000" + assert result["leadership_expiry_ms"] == "0" assert result["reserved_log_items"] == "100000" assert result["snapshot_distance"] == "75" From a7430004b3cbc9f9f5a7b712dd41e33e5b256126 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 04:33:15 +0100 Subject: [PATCH 199/356] Fix real time query profiler on AArch64 --- base/glibc-compatibility/musl/aarch64/syscall.s | 2 ++ programs/server/Server.cpp | 5 ----- src/Interpreters/TraceCollector.cpp | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/base/glibc-compatibility/musl/aarch64/syscall.s b/base/glibc-compatibility/musl/aarch64/syscall.s index 845986bf787..aadaea04ef5 100644 --- a/base/glibc-compatibility/musl/aarch64/syscall.s +++ b/base/glibc-compatibility/musl/aarch64/syscall.s @@ -2,6 +2,7 @@ .hidden __syscall .type __syscall,%function __syscall: +.cfi_startproc uxtw x8,w0 mov x0,x1 mov x1,x2 @@ -12,3 +13,4 @@ __syscall: mov x6,x7 svc 0 ret +.cfi_endproc diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 93562d6df90..c45291ba52c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1881,7 +1881,6 @@ try { total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); } - } #endif @@ -1896,10 +1895,6 @@ try " when two different stack unwinding methods will interfere with each other."); #endif -#if !defined(__x86_64__) - LOG_INFO(log, "Query Profiler and TraceCollector is only tested on x86_64. It also known to not work under qemu-user."); -#endif - if (!hasPHDRCache()) LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created" " (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe)."); diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 1fe11be6090..8e9c397b7a1 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include From 6e579e6bfaa4e486ff3cdd2285992ff95163c212 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 18:15:53 +0100 Subject: [PATCH 200/356] Use awk since it's much simpler --- docker/test/fuzzer/run-fuzzer.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 7a0d2939cd3..9358e88e1e8 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -386,7 +386,8 @@ if [ -f core.zst ]; then CORE_LINK='core.zst' fi -sed -n '//,/^$/p' s.log | rg "(^[^202])|" server.log > fatal.log ||: +# Keep all the lines in the paragraphs containing that either contain or don't start with 20... (year) +sed -n '//,/^$/p' s.log | awk '// || !/^20/' server.log > fatal.log ||: FATAL_LINK='' if [ -s fatal.log ]; then FATAL_LINK='fatal.log' From ff2882c7072ea2f08df67e77236f1890c6fcdc8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Mar 2024 18:16:38 +0100 Subject: [PATCH 201/356] Fix query profiler on AArch64 --- tests/queries/0_stateless/00974_query_profiler.sql | 2 +- tests/queries/0_stateless/01092_memory_profiler.sql | 2 +- tests/queries/0_stateless/01526_max_untracked_memory.sh | 3 +-- tests/queries/0_stateless/01569_query_profiler_big_query_id.sh | 3 +-- tests/queries/0_stateless/02161_addressToLineWithInlines.sql | 2 +- tests/queries/0_stateless/02252_jit_profile_events.sql | 2 +- .../02818_memory_profiler_sample_min_max_allocation_size.sh | 3 +-- 7 files changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index b697bd56800..24e4241b813 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest -- Tag no-fasttest: Not sure why fail even in sequential mode. Disabled for now to make some progress. SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index b69d3faf94e..3a04de650ce 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01526_max_untracked_memory.sh b/tests/queries/0_stateless/01526_max_untracked_memory.sh index 45fdb314fb2..b2bad637422 100755 --- a/tests/queries/0_stateless/01526_max_untracked_memory.sh +++ b/tests/queries/0_stateless/01526_max_untracked_memory.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64 -# requires TraceCollector, does not available under sanitizers and aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh index e54783e9655..941ab216d0b 100755 --- a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh +++ b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -9,4 +9,3 @@ query_id="aggregating_merge_tree_simple_aggregate_function_string_query100_profi ${CLICKHOUSE_CLIENT} --query="select sleep(1)" --query_id="$query_id" --query_profiler_real_time_period_ns=10000000 ${CLICKHOUSE_CLIENT} --query="system flush logs" ${CLICKHOUSE_CLIENT} --query="select count(*) > 1 from system.trace_log where query_id = '$query_id'" - diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index b6b497b4b55..78b414378f1 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug SET allow_introspection_functions = 0; SELECT addressToLineWithInlines(1); -- { serverError 446 } diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql index fb7f806c46b..487f43737e8 100644 --- a/tests/queries/0_stateless/02252_jit_profile_events.sql +++ b/tests/queries/0_stateless/02252_jit_profile_events.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-parallel, no-cpu-aarch64, no-msan +-- Tags: no-fasttest, no-parallel, no-msan SET compile_expressions = 1; SET min_count_to_compile_expression = 0; diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh index b1fbea26da7..9234c428147 100755 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings -# requires TraceCollector, does not available under sanitizers and aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 70abdf7a414ee57d59df51f6cf5ec435e2830f9e Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Mar 2024 17:32:22 +0000 Subject: [PATCH 202/356] Small improvements in JSON schema inference --- docs/en/interfaces/schema-inference.md | 64 +++++++++++++ src/Core/Settings.h | 3 +- src/Core/SettingsChangesHistory.h | 3 + src/Formats/EscapingRuleUtils.cpp | 9 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/SchemaInferenceUtils.cpp | 91 +++++++++++-------- ...02982_dont_infer_exponent_floats.reference | 1 + .../02982_dont_infer_exponent_floats.sql | 3 + ...erence_ambiguous_paths_as_string.reference | 3 + ...es_inference_ambiguous_paths_as_string.sql | 4 + 11 files changed, 142 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference create mode 100644 tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 39ae69eaef4..f2e9136d1db 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -549,6 +549,48 @@ Result: └───────┴─────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +##### input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects + +Enabling this setting allows to use String type for ambiguous paths during named tuples inference from JSON objects (when `input_format_json_try_infer_named_tuples_from_objects` is enabled) instead of an exception. +It allows to read JSON objects as named Tuples even if there are ambiguous paths. + +Disabled by default. + +**Examples** + +With disabled setting: +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 0; +DESC format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}'); +``` +Result: + +```text +Code: 636. DB::Exception: The table structure cannot be extracted from a JSONEachRow format file. Error: +Code: 117. DB::Exception: JSON objects have ambiguous paths: 'a' (with type Int64) and 'a.b'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1). +You can specify the structure manually. (CANNOT_EXTRACT_TABLE_STRUCTURE) +``` + +With enabled setting: +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : "a" : 42}, {"obj" : {"a" : {"b" : "Hello"}}}'); +SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}'); +``` + +Result: +```text +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┠+│ obj │ Tuple(a Nullable(String)) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─obj─────────────────┠+│ ('42') │ +│ ('{"b" : "Hello"}') │ +└─────────────────────┘ +``` + ##### input_format_json_read_objects_as_strings Enabling this setting allows reading nested JSON objects as strings. @@ -1554,6 +1596,28 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +#### input_format_try_infer_exponent_floats + +If enabled, ClickHouse will try to infer floats in exponential form for text formats (except JSON where numbers in exponential form are always inferred). + +Disabled by default. + +**Example** + +```sql +SET input_format_try_infer_exponent_floats = 1; +DESC format(CSV, +$$1.1E10 +2.3e-12 +42E00 +$$) +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┠+│ c1 │ Nullable(Float64) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + ## Self describing formats {#self-describing-formats} Self-describing formats contain information about the structure of the data in the data itself, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..3f71223c910 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1018,6 +1018,7 @@ class IColumn; M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_arrays_as_strings, true, "Allow to parse JSON arrays as strings in JSON input formats", 0) \ M(Bool, input_format_json_try_infer_named_tuples_from_objects, true, "Try to infer named tuples from JSON objects in JSON input formats", 0) \ + M(Bool, input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects, false, "Use String type instead of an exception in case of ambiguous paths in JSON objects during named tuples inference", 0) \ M(Bool, input_format_json_infer_incomplete_types_as_strings, true, "Use type String for keys that contains only Nulls or empty objects/arrays during schema inference in JSON input formats", 0) \ M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \ M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \ @@ -1025,7 +1026,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ - M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..f473d677ecd 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", { + {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + }}, {"24.2", { {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 16f8a341e03..577988871f3 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -450,8 +450,10 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo break; case FormatSettings::EscapingRule::JSON: result += fmt::format( - ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, read_numbers_as_strings={}, " - "read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}", + ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, " + "read_numbers_as_strings={}, " + "read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}, " + "use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects={}", settings.json.try_infer_numbers_from_strings, settings.json.read_bools_as_numbers, settings.json.read_bools_as_strings, @@ -460,7 +462,8 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo settings.json.read_arrays_as_strings, settings.json.try_infer_objects_as_tuples, settings.json.infer_incomplete_types_as_strings, - settings.json.allow_object_type); + settings.json.allow_object_type, + settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects); break; default: break; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a4a08d762b9..ccead6688a7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -105,6 +105,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects; format_settings.json.skip_null_value_in_named_tuples = settings.output_format_json_skip_null_value_in_named_tuples; format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects; + format_settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = settings.input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects; format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple; format_settings.json.ignore_unknown_keys_in_named_tuple = settings.input_format_json_ignore_unknown_keys_in_named_tuple; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 01c3632c730..42b21c77cef 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -202,6 +202,7 @@ struct FormatSettings bool quote_decimals = false; bool escape_forward_slashes = true; bool read_named_tuples_as_objects = false; + bool use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = false; bool write_named_tuples_as_objects = false; bool skip_null_value_in_named_tuples = false; bool defaults_for_missing_elements_in_named_tuple = false; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 06b52e7a7a2..998f97fae0d 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -136,7 +136,7 @@ namespace bool empty() const { return paths.empty(); } - DataTypePtr finalize() const + DataTypePtr finalize(bool use_string_type_for_ambiguous_paths = false) const { if (paths.empty()) throw Exception(ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, "Cannot infer named Tuple from JSON object because object is empty"); @@ -167,7 +167,7 @@ namespace current_node->leaf_type = type; } - return root_node.getType(); + return root_node.getType(use_string_type_for_ambiguous_paths); } private: @@ -180,7 +180,7 @@ namespace /// Store path to this node for better exception message in case of ambiguous paths. String path; - DataTypePtr getType() const + DataTypePtr getType(bool use_string_type_for_ambiguous_paths) const { /// Check if we have ambiguous paths. /// For example: @@ -191,7 +191,16 @@ namespace /// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing), /// but it's a valid case and we should ignore path 'a.b'. if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty()) - throw Exception(ErrorCodes::INCORRECT_DATA, "JSON objects have ambiguous paths: '{}' with type {} and '{}'", path, leaf_type->getName(), nodes.begin()->second.path); + { + if (use_string_type_for_ambiguous_paths) + return std::make_shared(); + throw Exception( + ErrorCodes::INCORRECT_DATA, + "JSON objects have ambiguous paths: '{}' (with type {}) and '{}'. You can enable setting " + "input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type " + "for path '{}'", + path, leaf_type->getName(), nodes.begin()->second.path, path); + } if (nodes.empty()) return leaf_type; @@ -203,7 +212,7 @@ namespace for (const auto & [name, node] : nodes) { node_names.push_back(name); - node_types.push_back(node.getType()); + node_types.push_back(node.getType(use_string_type_for_ambiguous_paths)); } return std::make_shared(std::move(node_types), std::move(node_names)); @@ -866,13 +875,15 @@ namespace return std::make_shared(nested_types); } + template bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) { - if (settings.try_infer_exponent_floats) + if (is_json || settings.try_infer_exponent_floats) return tryReadFloatText(value, buf); return tryReadFloatTextNoExponent(value, buf); } + template DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) { if (buf.eof()) @@ -911,7 +922,7 @@ namespace buf.position() = number_start; } - if (tryReadFloat(tmp_float, buf, settings)) + if (tryReadFloat(tmp_float, buf, settings)) { if (read_int && buf.position() == int_end) return std::make_shared(); @@ -945,7 +956,7 @@ namespace peekable_buf.rollbackToCheckpoint(true); } - if (tryReadFloat(tmp_float, peekable_buf, settings)) + if (tryReadFloat(tmp_float, peekable_buf, settings)) { /// Float parsing reads no fewer bytes than integer parsing, /// so position of the buffer is either the same, or further. @@ -957,7 +968,7 @@ namespace return std::make_shared(); } } - else if (tryReadFloat(tmp_float, buf, settings)) + else if (tryReadFloat(tmp_float, buf, settings)) { return std::make_shared(); } @@ -966,6 +977,36 @@ namespace return nullptr; } + template + DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings) + { + ReadBufferFromString buf(field); + + if (settings.try_infer_integers) + { + Int64 tmp_int; + if (tryReadIntText(tmp_int, buf) && buf.eof()) + return std::make_shared(); + + /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. + buf.position() = buf.buffer().begin(); + + /// In case of Int64 overflow, try to infer UInt64 + UInt64 tmp_uint; + if (tryReadIntText(tmp_uint, buf) && buf.eof()) + return std::make_shared(); + } + + /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. + buf.position() = buf.buffer().begin(); + + Float64 tmp; + if (tryReadFloat(tmp, buf, settings) && buf.eof()) + return std::make_shared(); + + return nullptr; + } + template DataTypePtr tryInferString(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) { @@ -995,7 +1036,7 @@ namespace { if (settings.json.try_infer_numbers_from_strings) { - if (auto number_type = tryInferNumberFromString(field, settings)) + if (auto number_type = tryInferNumberFromStringImpl(field, settings)) { json_info->numbers_parsed_from_json_strings.insert(number_type.get()); return number_type; @@ -1238,7 +1279,7 @@ namespace } /// Number - return tryInferNumber(buf, settings); + return tryInferNumber(buf, settings); } } @@ -1294,7 +1335,7 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F return; } - data_type = json_paths->finalize(); + data_type = json_paths->finalize(settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects); transformFinalInferredJSONTypeIfNeededImpl(data_type, settings, json_info, remain_nothing_types); return; } @@ -1377,31 +1418,7 @@ void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const Forma DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings) { - ReadBufferFromString buf(field); - - if (settings.try_infer_integers) - { - Int64 tmp_int; - if (tryReadIntText(tmp_int, buf) && buf.eof()) - return std::make_shared(); - - /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. - buf.position() = buf.buffer().begin(); - - /// In case of Int64 overflow, try to infer UInt64 - UInt64 tmp_uint; - if (tryReadIntText(tmp_uint, buf) && buf.eof()) - return std::make_shared(); - } - - /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. - buf.position() = buf.buffer().begin(); - - Float64 tmp; - if (tryReadFloat(tmp, buf, settings) && buf.eof()) - return std::make_shared(); - - return nullptr; + return tryInferNumberFromStringImpl(field, settings); } DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference index b6d1ff865e5..47e9b86237a 100644 --- a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference @@ -1,2 +1,3 @@ c1 Nullable(String) c1 Nullable(Float64) +x Nullable(Float64) diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql index 2a281e898f1..4f78855f5ce 100644 --- a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql @@ -1,2 +1,5 @@ DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0; DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1; +-- This setting should not take affect on JSON formats +DESC format(JSONEachRow, '{"x" : 1.1e20}') settings input_format_try_infer_exponent_floats = 0; + diff --git a/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference new file mode 100644 index 00000000000..0318b136ade --- /dev/null +++ b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference @@ -0,0 +1,3 @@ +obj Tuple(\n a Nullable(String)) +('42') +('{"b" : 42}') diff --git a/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql new file mode 100644 index 00000000000..4b986c94868 --- /dev/null +++ b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql @@ -0,0 +1,4 @@ +set input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=1; +desc format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : 42}}}'); +select * from format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : 42}}}'); + From d3c42b5a24e7472cd6d613adfba94e99aa4a7401 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 4 Mar 2024 17:36:06 +0000 Subject: [PATCH 203/356] Fix test flakiness + make index_granularity stable, it's randomized in tests --- tests/queries/0_stateless/03000_minmax_index_first.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql index 5dae245a0a0..5d2896c792a 100644 --- a/tests/queries/0_stateless/03000_minmax_index_first.sql +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -8,8 +8,9 @@ CREATE TABLE skip_table INDEX v_mm v TYPE minmax GRANULARITY 2 ) ENGINE = MergeTree -PRIMARY KEY k; +PRIMARY KEY k +SETTINGS index_granularity = 8192; -INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(1000000); +INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(100000); SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain like '%Name%'; From 2d8ca99860eb01baa5694b60b701f5f9d2874778 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 4 Mar 2024 17:48:47 +0000 Subject: [PATCH 204/356] Refactor more system storages. --- src/Storages/StorageMergeTreeIndex.cpp | 79 ++++++++++++++--- src/Storages/StorageMergeTreeIndex.h | 8 +- src/Storages/System/StorageSystemColumns.cpp | 77 ++++++++++++++-- src/Storages/System/StorageSystemColumns.h | 3 +- .../StorageSystemDataSkippingIndices.cpp | 66 +++++++++++++- .../System/StorageSystemDataSkippingIndices.h | 3 +- .../System/StorageSystemDetachedParts.cpp | 48 +++++++++- .../System/StorageSystemDetachedParts.h | 17 ++-- .../StorageSystemDroppedTablesParts.cpp | 4 +- .../System/StorageSystemDroppedTablesParts.h | 6 +- .../System/StorageSystemPartsBase.cpp | 60 +++++++++++-- src/Storages/System/StorageSystemPartsBase.h | 38 +++++++- src/Storages/System/StorageSystemReplicas.cpp | 88 ++++++++++++++----- src/Storages/System/StorageSystemReplicas.h | 7 +- 14 files changed, 421 insertions(+), 83 deletions(-) diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index d875611bb50..5f89849e920 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -216,7 +219,6 @@ StorageMergeTreeIndex::StorageMergeTreeIndex( : IStorage(table_id_) , source_table(source_table_) , with_marks(with_marks_) - , log(&Poco::Logger::get("StorageMergeTreeIndex")) { const auto * merge_tree = dynamic_cast(source_table.get()); if (!merge_tree) @@ -230,7 +232,47 @@ StorageMergeTreeIndex::StorageMergeTreeIndex( setInMemoryMetadata(storage_metadata); } -Pipe StorageMergeTreeIndex::read( +class ReadFromMergeTreeIndex : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromMergeTreeIndex"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromMergeTreeIndex( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , log(&Poco::Logger::get("StorageMergeTreeIndex")) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + Poco::Logger * log; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageMergeTreeIndex::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -265,21 +307,32 @@ Pipe StorageMergeTreeIndex::read( context->checkAccess(AccessType::SELECT, source_table->getStorageID(), columns_from_storage); - auto header = storage_snapshot->getSampleBlockForColumns(column_names); - auto filtered_parts = getFilteredDataParts(query_info, context); + auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); - LOG_DEBUG(log, "Reading index{}from {} parts of table {}", - with_marks ? " with marks " : " ", - filtered_parts.size(), - source_table->getStorageID().getNameForLogs()); + auto this_ptr = std::static_pointer_cast(shared_from_this()); - return Pipe(std::make_shared(std::move(header), key_sample_block, std::move(filtered_parts), context, with_marks)); + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(sample_block), std::move(this_ptr)); + + query_plan.addStep(std::move(reading)); } -MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(SelectQueryInfo & query_info, const ContextPtr & context) const +void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - const auto * select_query = query_info.query->as(); - if (!select_query || !select_query->where()) + auto filtered_parts = storage->getFilteredDataParts(predicate, context); + + LOG_DEBUG(log, "Reading index{}from {} parts of table {}", + storage->with_marks ? " with marks " : " ", + filtered_parts.size(), + storage->source_table->getStorageID().getNameForLogs()); + + pipeline.init(Pipe(std::make_shared(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks))); +} + +MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const +{ + if (!predicate) return data_parts; auto all_part_names = ColumnString::create(); @@ -287,7 +340,7 @@ MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(Selec all_part_names->insert(part->name); Block filtered_block{{std::move(all_part_names), std::make_shared(), part_name_column.name}}; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return {}; diff --git a/src/Storages/StorageMergeTreeIndex.h b/src/Storages/StorageMergeTreeIndex.h index b610d391655..a1fb61d5a56 100644 --- a/src/Storages/StorageMergeTreeIndex.h +++ b/src/Storages/StorageMergeTreeIndex.h @@ -21,7 +21,8 @@ public: const ColumnsDescription & columns, bool with_marks_); - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -33,14 +34,15 @@ public: String getName() const override { return "MergeTreeIndex"; } private: - MergeTreeData::DataPartsVector getFilteredDataParts(SelectQueryInfo & query_info, const ContextPtr & context) const; + friend class ReadFromMergeTreeIndex; + + MergeTreeData::DataPartsVector getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const; StoragePtr source_table; bool with_marks; MergeTreeData::DataPartsVector data_parts; Block key_sample_block; - Poco::Logger * log; }; } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index ad65f6f5476..6bc1208a6a9 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -291,8 +294,51 @@ private: std::chrono::milliseconds lock_acquire_timeout; }; +class ReadFromSystemColumns : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemColumns"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -Pipe StorageSystemColumns::read( + ReadFromSystemColumns( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const size_t max_block_size; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemColumns::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -306,9 +352,22 @@ Pipe StorageSystemColumns::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ Block block_to_filter; Storages storages; Pipes pipes; + auto header = getOutputStream().header; { /// Add `database` column. @@ -338,12 +397,13 @@ Pipe StorageSystemColumns::read( block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); /// Filter block with `database` column. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (!block_to_filter.rows()) { - pipes.emplace_back(std::make_shared(header)); - return Pipe::unitePipes(std::move(pipes)); + pipes.emplace_back(std::make_shared(std::move(header))); + pipeline.init(Pipe::unitePipes(std::move(pipes))); + return; } ColumnPtr & database_column = block_to_filter.getByName("database").column; @@ -384,12 +444,13 @@ Pipe StorageSystemColumns::read( } /// Filter block with `database` and `table` columns. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (!block_to_filter.rows()) { - pipes.emplace_back(std::make_shared(header)); - return Pipe::unitePipes(std::move(pipes)); + pipes.emplace_back(std::make_shared(std::move(header))); + pipeline.init(Pipe::unitePipes(std::move(pipes))); + return; } ColumnPtr filtered_database_column = block_to_filter.getByName("database").column; @@ -400,7 +461,7 @@ Pipe StorageSystemColumns::read( std::move(filtered_database_column), std::move(filtered_table_column), std::move(storages), context)); - return Pipe::unitePipes(std::move(pipes)); + pipeline.init(Pipe::unitePipes(std::move(pipes))); } } diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 7b4b5dd8fb3..22b2541d93f 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -17,7 +17,8 @@ public: std::string getName() const override { return "SystemColumns"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 0c4eb197efd..2fa74ef23e6 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -12,7 +12,10 @@ #include #include #include +#include +#include #include +#include namespace DB @@ -176,7 +179,51 @@ private: DatabaseTablesIteratorPtr tables_it; }; -Pipe StorageSystemDataSkippingIndices::read( +class ReadFromSystemDataSkippingIndices : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemDataSkippingIndices"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDataSkippingIndices( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const size_t max_block_size; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemDataSkippingIndices::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -190,6 +237,17 @@ Pipe StorageSystemDataSkippingIndices::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ MutableColumnPtr column = ColumnString::create(); const auto databases = DatabaseCatalog::instance().getDatabases(); @@ -207,11 +265,11 @@ Pipe StorageSystemDataSkippingIndices::read( /// Condition on "database" in a query acts like an index. Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); ColumnPtr & filtered_databases = block.getByPosition(0).column; - return Pipe(std::make_shared( - std::move(columns_mask), std::move(header), max_block_size, std::move(filtered_databases), context)); + pipeline.init(Pipe(std::make_shared( + std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases), context))); } } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index 8a1e8c159b4..8bf1da98368 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -14,7 +14,8 @@ public: std::string getName() const override { return "SystemDataSkippingIndices"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 3dae43976f7..913983952c3 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include @@ -285,7 +287,34 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i setInMemoryMetadata(storage_metadata); } -Pipe StorageSystemDetachedParts::read( +class ReadFromSystemDetachedParts : public ReadFromSystemPartsBase +{ +public: + ReadFromSystemDetachedParts( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_, + size_t num_streams_) + : ReadFromSystemPartsBase(column_names_, query_info_, storage_snapshot_, context_, sample_block, std::move(storage_), std::move(columns_mask_), false) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + {} + + std::string getName() const override { return "ReadFromSystemDetachedParts"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + +private: + const size_t max_block_size; + const size_t num_streams; +}; + +void StorageSystemDetachedParts::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -299,17 +328,28 @@ Pipe StorageSystemDetachedParts::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - auto state = std::make_shared(StoragesInfoStream(query_info, context)); + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size, num_streams); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemDetachedParts::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto state = std::make_shared(StoragesInfoStream(predicate, context)); Pipe pipe; for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); + auto source = std::make_shared(getOutputStream().header, state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } - return pipe; + pipeline.init(std::move(pipe)); } } diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index 20ac69f0eea..52b964e3b3c 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -20,14 +20,15 @@ public: bool isSystemStorage() const override { return true; } protected: - Pipe read( - const Names & /* column_names */, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t /*max_block_size*/, - size_t /*num_streams*/) override; + void read( + QueryPlan & query_plan, + const Names & /* column_names */, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp index bbe99c688c9..20baeee1d3b 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp +++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp @@ -10,7 +10,7 @@ namespace DB { -StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context) +StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -73,7 +73,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que if (block_to_filter.rows()) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h index f548697a6a9..a44abea7285 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.h +++ b/src/Storages/System/StorageSystemDroppedTablesParts.h @@ -9,7 +9,7 @@ namespace DB class StoragesDroppedInfoStream : public StoragesInfoStreamBase { public: - StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context); + StoragesDroppedInfoStream(const ActionsDAG::Node * predicate, ContextPtr context); protected: bool tryLockTable(StoragesInfo &) override { @@ -30,9 +30,9 @@ public: std::string getName() const override { return "SystemDroppedTablesParts"; } protected: - std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) override + std::unique_ptr getStoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) override { - return std::make_unique(query_info, context); + return std::make_unique(predicate, context); } }; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 48dab8c4777..16e2fd080fb 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -79,7 +82,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo return data->getProjectionPartsVectorForInternalUsage({State::Active}, &state); } -StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) +StoragesInfoStream::StoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -111,7 +114,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte std::move(database_column_mut), std::make_shared(), "database")); /// Filter block_to_filter with column 'database'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -190,7 +193,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte if (rows) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); } @@ -201,7 +204,36 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte } -Pipe StorageSystemPartsBase::read( +ReadFromSystemPartsBase::ReadFromSystemPartsBase( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + bool has_state_column_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , has_state_column(has_state_column_) +{ +} + +void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemPartsBase::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -212,20 +244,32 @@ Pipe StorageSystemPartsBase::read( { bool has_state_column = hasStateColumn(column_names, storage_snapshot); - auto stream = getStoragesInfoStream(query_info, context); - /// Create the result. Block sample = storage_snapshot->metadata->getSampleBlock(); auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), has_state_column); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto stream = storage->getStoragesInfoStream(predicate, context); + auto header = getOutputStream().header; + MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); while (StoragesInfo info = stream->next()) { - processNextStorage(context, res_columns, columns_mask, info, has_state_column); + storage->processNextStorage(context, res_columns, columns_mask, info, has_state_column); } if (has_state_column) @@ -234,7 +278,7 @@ Pipe StorageSystemPartsBase::read( UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + pipeline.init(Pipe(std::make_shared(std::move(header), std::move(chunk)))); } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 1127a8906e5..2b4055373d1 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -114,7 +115,7 @@ protected: class StoragesInfoStream : public StoragesInfoStreamBase { public: - StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context); + StoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context); }; /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. @@ -122,7 +123,8 @@ public: class StorageSystemPartsBase : public IStorage { public: - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -139,17 +141,45 @@ private: static bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot); protected: + friend class ReadFromSystemPartsBase; + const FormatSettings format_settings = {}; StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); - virtual std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) + virtual std::unique_ptr getStoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) { - return std::make_unique(query_info, context); + return std::make_unique(predicate, context); } virtual void processNextStorage(ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) = 0; }; +class ReadFromSystemPartsBase : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemPartsBase"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemPartsBase( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + bool has_state_column_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +protected: + std::shared_ptr storage; + std::vector columns_mask; + const bool has_state_column; + const ActionsDAG::Node * predicate = nullptr; +}; + + } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index eeb3db342b4..26dead8cb01 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -182,13 +185,6 @@ public: , requests_with_zk_fields(max_threads) {} - Pipe read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context); - -private: StatusRequestsPool requests_without_zk_fields; StatusRequestsPool requests_with_zk_fields; }; @@ -241,8 +237,51 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) StorageSystemReplicas::~StorageSystemReplicas() = default; +class ReadFromSystemReplicas : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemReplicas"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -Pipe StorageSystemReplicas::read( + ReadFromSystemReplicas( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::map> replicated_tables_, + bool with_zk_fields_, + std::shared_ptr impl_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , replicated_tables(std::move(replicated_tables_)) + , with_zk_fields(with_zk_fields_) + , impl(std::move(impl_)) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::map> replicated_tables; + const bool with_zk_fields; + std::shared_ptr impl; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemReplicas::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -250,15 +289,6 @@ Pipe StorageSystemReplicas::read( QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, const size_t /*num_streams*/) -{ - return impl->read(column_names, storage_snapshot, query_info, context); -} - -Pipe StorageSystemReplicasImpl::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context) { storage_snapshot->check(column_names); @@ -305,6 +335,18 @@ Pipe StorageSystemReplicasImpl::read( } } + auto header = storage_snapshot->metadata->getSampleBlock(); + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(replicated_tables), with_zk_fields, impl); // /*std::move(this_ptr),*/ std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto header = getOutputStream().header; + MutableColumnPtr col_database_mut = ColumnString::create(); MutableColumnPtr col_table_mut = ColumnString::create(); MutableColumnPtr col_engine_mut = ColumnString::create(); @@ -332,10 +374,14 @@ Pipe StorageSystemReplicasImpl::read( { col_engine, std::make_shared(), "engine" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) - return {}; + { + auto source = std::make_shared(std::move(header)); + pipeline.init(Pipe(std::move(source))); + return; + } col_database = filtered_block.getByName("database").column; col_table = filtered_block.getByName("table").column; @@ -347,7 +393,7 @@ Pipe StorageSystemReplicasImpl::read( size_t tables_size = col_database->size(); /// Use separate queues for requests with and without ZooKeeper fields. - StatusRequestsPool & get_status_requests = with_zk_fields ? requests_with_zk_fields : requests_without_zk_fields; + StatusRequestsPool & get_status_requests = with_zk_fields ? impl->requests_with_zk_fields : impl->requests_without_zk_fields; QueryStatusPtr query_status = context ? context->getProcessListElement() : nullptr; @@ -435,7 +481,7 @@ Pipe StorageSystemReplicasImpl::read( UInt64 num_rows = fin_columns.at(0)->size(); Chunk chunk(std::move(fin_columns), num_rows); - return Pipe(std::make_shared(storage_snapshot->metadata->getSampleBlock(), std::move(chunk))); + pipeline.init(Pipe(std::make_shared(header, std::move(chunk)))); } diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index f6bdfc937ea..11c5371310f 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB @@ -20,7 +20,8 @@ public: std::string getName() const override { return "SystemReplicas"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -32,7 +33,7 @@ public: bool isSystemStorage() const override { return true; } private: - std::unique_ptr impl; + std::shared_ptr impl; }; } From 9a0546168094d38692725f89677077e32bd144b5 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Mar 2024 17:49:33 +0000 Subject: [PATCH 205/356] Better exception message --- docs/en/interfaces/schema-inference.md | 2 +- src/Formats/SchemaInferenceUtils.cpp | 45 ++++++++++++++------------ 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index f2e9136d1db..05fae994cbe 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -568,7 +568,7 @@ Result: ```text Code: 636. DB::Exception: The table structure cannot be extracted from a JSONEachRow format file. Error: -Code: 117. DB::Exception: JSON objects have ambiguous paths: 'a' (with type Int64) and 'a.b'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1). +Code: 117. DB::Exception: JSON objects have ambiguous data: in some objects path 'a' has type 'Int64' and in some - 'Tuple(b String)'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1). You can specify the structure manually. (CANNOT_EXTRACT_TABLE_STRUCTURE) ``` diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 998f97fae0d..cb574551d26 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -182,26 +182,6 @@ namespace DataTypePtr getType(bool use_string_type_for_ambiguous_paths) const { - /// Check if we have ambiguous paths. - /// For example: - /// 'a.b.c' : Int32 and 'a.b' : String - /// Also check if leaf type is Nothing, because the next situation is possible: - /// {"a" : {"b" : null}} -> 'a.b' : Nullable(Nothing) - /// {"a" : {"b" : {"c" : 42}}} -> 'a.b.c' : Int32 - /// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing), - /// but it's a valid case and we should ignore path 'a.b'. - if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty()) - { - if (use_string_type_for_ambiguous_paths) - return std::make_shared(); - throw Exception( - ErrorCodes::INCORRECT_DATA, - "JSON objects have ambiguous paths: '{}' (with type {}) and '{}'. You can enable setting " - "input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type " - "for path '{}'", - path, leaf_type->getName(), nodes.begin()->second.path, path); - } - if (nodes.empty()) return leaf_type; @@ -215,7 +195,30 @@ namespace node_types.push_back(node.getType(use_string_type_for_ambiguous_paths)); } - return std::make_shared(std::move(node_types), std::move(node_names)); + auto tuple_type = std::make_shared(std::move(node_types), std::move(node_names)); + + /// Check if we have ambiguous paths. + /// For example: + /// 'a.b.c' : Int32 and 'a.b' : String + /// Also check if leaf type is Nothing, because the next situation is possible: + /// {"a" : {"b" : null}} -> 'a.b' : Nullable(Nothing) + /// {"a" : {"b" : {"c" : 42}}} -> 'a.b.c' : Int32 + /// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing), + /// but it's a valid case and we should ignore path 'a.b'. + if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty()) + { + if (use_string_type_for_ambiguous_paths) + return std::make_shared(); + + throw Exception( + ErrorCodes::INCORRECT_DATA, + "JSON objects have ambiguous data: in some objects path '{}' has type '{}' and in some - '{}'. You can enable setting " + "input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type " + "for path '{}'", + path, leaf_type->getName(), tuple_type->getName(), path); + } + + return tuple_type; } }; From 47d8f039e56de7299f741613d57fc972e71157d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 4 Mar 2024 18:01:12 +0000 Subject: [PATCH 206/356] Cleanup VirtualColumnUtils --- src/Storages/StorageMerge.cpp | 25 ++- src/Storages/VirtualColumnUtils.cpp | 229 ---------------------------- src/Storages/VirtualColumnUtils.h | 17 --- 3 files changed, 20 insertions(+), 251 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b827670bd4c..eb45a8fac66 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -63,10 +63,12 @@ #include #include +namespace DB +{ + namespace { -using namespace DB; bool columnIsPhysical(ColumnDefaultKind kind) { return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized; @@ -82,10 +84,23 @@ bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs) return false; } +/// Adds to the select query section `WITH value AS column_name` +/// +/// For example: +/// - `WITH 9000 as _port`. +void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) +{ + auto & select = ast->as(); + if (!select.with()) + select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); + + auto literal = std::make_shared(value); + literal->alias = column_name; + literal->prefer_alias_to_column_name = true; + select.with()->children.push_back(literal); } -namespace DB -{ +} namespace ErrorCodes { @@ -928,8 +943,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (!is_storage_merge_engine) { - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", current_storage_id.table_name); - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_database", current_storage_id.database_name); + rewriteEntityInAst(modified_query_info.query, "_table", current_storage_id.table_name); + rewriteEntityInAst(modified_query_info.query, "_database", current_storage_id.database_name); } } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 07ac61c110d..8457a018625 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -55,189 +55,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace -{ - -/// Verifying that the function depends only on the specified columns -bool isValidFunction(const ASTPtr & expression, const std::function & is_constant) -{ - const auto * function = expression->as(); - if (function && functionIsInOrGlobalInOperator(function->name)) - { - // Second argument of IN can be a scalar subquery - return isValidFunction(function->arguments->children[0], is_constant); - } - else - return is_constant(expression); -} - -/// Extract all subfunctions of the main conjunction, but depending only on the specified columns -bool extractFunctions(const ASTPtr & expression, const std::function & is_constant, ASTs & result) -{ - const auto * function = expression->as(); - - if (function) - { - if (function->name == "and" || function->name == "indexHint") - { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; - } - else if (function->name == "or") - { - bool ret = false; - ASTs or_args; - for (const auto & child : function->arguments->children) - ret |= extractFunctions(child, is_constant, or_args); - - if (!or_args.empty()) - { - /// In case of there are less number of arguments for which - /// is_constant() == true, we need to add always-true - /// implicitly to avoid breaking AND invariant. - /// - /// Consider the following: - /// - /// ((value = 10) OR (_table = 'v2')) AND ((_table = 'v1') OR (value = 20)) - /// - /// Without implicit always-true: - /// - /// (_table = 'v2') AND (_table = 'v1') - /// - /// With: - /// - /// (_table = 'v2' OR 1) AND (_table = 'v1' OR 1) -> (_table = 'v2') OR (_table = 'v1') - /// - if (or_args.size() != function->arguments->children.size()) - or_args.push_back(std::make_shared(Field(1))); - result.push_back(makeASTForLogicalOr(std::move(or_args))); - } - return ret; - } - } - - if (isValidFunction(expression, is_constant)) - { - result.push_back(expression->clone()); - return true; - } - else - return false; -} - -/// Construct a conjunction from given functions -ASTPtr buildWhereExpression(ASTs && functions) -{ - if (functions.empty()) - return nullptr; - if (functions.size() == 1) - return functions[0]; - return makeASTForLogicalAnd(std::move(functions)); -} - -} - namespace VirtualColumnUtils { -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func) -{ - auto & select = ast->as(); - if (!select.with()) - select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); - - if (func.empty()) - { - auto literal = std::make_shared(value); - literal->alias = column_name; - literal->prefer_alias_to_column_name = true; - select.with()->children.push_back(literal); - } - else - { - auto literal = std::make_shared(value); - literal->prefer_alias_to_column_name = true; - - auto function = makeASTFunction(func, literal); - function->alias = column_name; - function->prefer_alias_to_column_name = true; - select.with()->children.push_back(function); - } -} - -bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast) -{ - if (block.rows() == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot prepare filter with empty block"); - - /// Take the first row of the input block to build a constant block - auto columns = block.getColumns(); - Columns const_columns(columns.size()); - for (size_t i = 0; i < columns.size(); ++i) - { - if (isColumnConst(*columns[i])) - const_columns[i] = columns[i]->cloneResized(1); - else - const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1); - } - - block.setColumns(const_columns); - - bool unmodified = true; - const auto & select = query->as(); - if (!select.where() && !select.prewhere()) - return unmodified; - - // Provide input columns as constant columns to check if an expression is - // constant and depends on the columns from provided block (the last is - // required to allow skipping some conditions for handling OR). - std::function is_constant = [&block, &context](const ASTPtr & expr) - { - auto actions = std::make_shared(block.getColumnsWithTypeAndName()); - PreparedSetsPtr prepared_sets = std::make_shared(); - const NamesAndTypesList source_columns; - const NamesAndTypesList aggregation_keys; - const ColumnNumbersList grouping_set_keys; - - ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, - { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); - - ActionsVisitor(visitor_data).visit(expr); - actions = visitor_data.getActions(); - auto expr_column_name = expr->getColumnName(); - - const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name); - if (!expr_const_node) - return false; - auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}); - const auto & nodes = filter_actions->getNodes(); - bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node) - { - return block.has(node.result_name); - }); - if (!has_dependent_columns) - return false; - - auto expression_actions = std::make_shared(actions); - auto block_with_constants = block; - expression_actions->execute(block_with_constants, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); - return block_with_constants.has(expr_column_name) && isColumnConst(*block_with_constants.getByName(expr_column_name).column); - }; - - /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. - ASTs functions; - if (select.where()) - unmodified &= extractFunctions(select.where(), is_constant, functions); - if (select.prewhere()) - unmodified &= extractFunctions(select.prewhere(), is_constant, functions); - - expression_ast = buildWhereExpression(std::move(functions)); - return unmodified; -} - static void makeSets(const ExpressionActionsPtr & actions, const ContextPtr & context) { for (const auto & node : actions->getNodes()) @@ -294,55 +114,6 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) } } -void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast) -{ - if (block.rows() == 0) - return; - - if (!expression_ast) - prepareFilterBlockWithQuery(query, context, block, expression_ast); - - if (!expression_ast) - return; - - /// Let's analyze and calculate the prepared expression. - auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); - ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); - ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); - - makeSets(actions, context); - - Block block_with_filter = block; - actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); - - /// Filter the block. - String filter_column_name = expression_ast->getColumnName(); - ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullIfNeeded(); - if (filter_column->getDataType() != TypeIndex::UInt8) - return; - - ConstantFilterDescription constant_filter(*filter_column); - - if (constant_filter.always_true) - { - return; - } - - if (constant_filter.always_false) - { - block = block.cloneEmpty(); - return; - } - - FilterDescription filter(*filter_column); - - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnPtr & column = block.safeGetByPosition(i).column; - column = column->filter(*filter.data, -1); - } -} - NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns) { auto default_virtuals = NamesAndTypesList{ diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 7a9b2605339..3e7299b4a63 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -17,23 +17,6 @@ class NamesAndTypesList; namespace VirtualColumnUtils { -/// Adds to the select query section `WITH value AS column_name`, and uses func -/// to wrap the value (if any) -/// -/// For example: -/// - `WITH 9000 as _port`. -/// - `WITH toUInt16(9000) as _port`. -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func = ""); - -/// Prepare `expression_ast` to filter block. Returns true if `expression_ast` is not trimmed, that is, -/// `block` provides all needed columns for `expression_ast`, else return false. -bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast); - -/// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. -/// Only elements of the outer conjunction are considered, depending only on the columns present in the block. -/// If `expression_ast` is passed, use it to filter block. -void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast = {}); - /// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. /// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); From d209508753a522782b24b61f4a399e0042d1a0ae Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 4 Mar 2024 18:26:42 +0000 Subject: [PATCH 207/356] Fixing style. --- src/Storages/VirtualColumnUtils.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 8457a018625..386dcd17733 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -50,11 +50,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace VirtualColumnUtils { From b86fcf0e7ddde02a1aeb07d0fa99504fbf677430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 19:53:18 +0100 Subject: [PATCH 208/356] Add missing compression clone() in ASTQueryWithOutput --- src/Parsers/ASTQueryWithOutput.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index 4bf1e6cb231..b9b07eb7070 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -23,6 +23,16 @@ void ASTQueryWithOutput::cloneOutputOptions(ASTQueryWithOutput & cloned) const cloned.settings_ast = settings_ast->clone(); cloned.children.push_back(cloned.settings_ast); } + if (compression) + { + cloned.compression = compression->clone(); + cloned.children.push_back(cloned.compression); + } + if (compression_level) + { + cloned.compression_level = compression_level->clone(); + cloned.children.push_back(cloned.compression_level); + } } void ASTQueryWithOutput::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const From 37891660be107c41f9883d0d9d3fa8428d49e316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 20:01:03 +0100 Subject: [PATCH 209/356] More missing resets --- src/Parsers/ASTQueryWithOutput.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index b9b07eb7070..3890ed2347a 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -74,9 +74,12 @@ bool ASTQueryWithOutput::resetOutputASTIfExist(IAST & ast) /// FIXME: try to prettify this cast using `as<>()` if (auto * ast_with_output = dynamic_cast(&ast)) { - ast_with_output->format.reset(); ast_with_output->out_file.reset(); + ast_with_output->format.reset(); ast_with_output->settings_ast.reset(); + ast_with_output->compression.reset(); + ast_with_output->compression_level.reset(); + ast_with_output->children.clear(); return true; } From bd2b0b4338923328198ed6e27f872324646c5a0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 4 Mar 2024 20:01:26 +0100 Subject: [PATCH 210/356] Export only str and bool for build configs --- tests/ci/ci_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index dd175177858..b9c332e8e18 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -264,6 +264,8 @@ class BuildConfig: def process(field_name: str, field: Union[bool, str]) -> str: if isinstance(field, bool): field = str(field).lower() + elif not isinstance(field, str): + field = "" if export: return f"export BUILD_{field_name.upper()}={repr(field)}" return f"BUILD_{field_name.upper()}={field}" From 680bb3392e080f167ce01e0ebf8f380a43b1de24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 4 Mar 2024 20:01:27 +0100 Subject: [PATCH 211/356] More missing trash --- src/Parsers/ASTInsertQuery.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index 6a4ce078f79..b0f444ed755 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -59,11 +59,13 @@ public: if (database) { res->database = database->clone(); res->children.push_back(res->database); } if (table) { res->table = table->clone(); res->children.push_back(res->table); } if (columns) { res->columns = columns->clone(); res->children.push_back(res->columns); } - if (select) { res->select = select->clone(); res->children.push_back(res->select); } - if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } if (table_function) { res->table_function = table_function->clone(); res->children.push_back(res->table_function); } if (partition_by) { res->partition_by = partition_by->clone(); res->children.push_back(res->partition_by); } if (settings_ast) { res->settings_ast = settings_ast->clone(); res->children.push_back(res->settings_ast); } + if (select) { res->select = select->clone(); res->children.push_back(res->select); } + if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } + if (infile) { res->infile = infile->clone(); res->children.push_back(res->infile); } + if (compression) { res->compression = compression->clone(); res->children.push_back(res->compression); } return res; } From a6caace5ecb299184b9f07e3c3e4dc38a210b924 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 4 Mar 2024 20:03:45 +0100 Subject: [PATCH 212/356] Add a new runner type --- tests/ci/lambda_shared_package/lambda_shared/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index 9e6c5dde298..043a0310d11 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -20,11 +20,12 @@ RUNNER_TYPE_LABELS = [ "style-checker", "style-checker-aarch64", # private runners - "private-style-checker", "private-builder", + "private-clickpipes", "private-func-tester", "private-fuzzer-unit-tester", "private-stress-tester", + "private-style-checker", ] From bf91fe8bae6680d0bf68b3917a253c2316f4142b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 4 Mar 2024 20:06:46 +0100 Subject: [PATCH 213/356] fix --- tests/integration/test_replicated_database/test.py | 2 +- .../integration/test_replicated_database_cluster_groups/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index b47f86a843d..4fa42ff6f1a 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -509,7 +509,7 @@ def test_alters_from_different_replicas(started_cluster): settings = {"distributed_ddl_task_timeout": 5} assert ( - "There are 1 unfinished hosts (0 of them are currently executing the task" + "is not finished on 1 of 3 hosts" in competing_node.query_and_get_error( "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;", settings=settings, diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py index 647626d8014..b501c2573b2 100644 --- a/tests/integration/test_replicated_database_cluster_groups/test.py +++ b/tests/integration/test_replicated_database_cluster_groups/test.py @@ -96,7 +96,7 @@ def test_cluster_groups(started_cluster): main_node_2.stop_clickhouse() settings = {"distributed_ddl_task_timeout": 5} assert ( - "There are 1 unfinished hosts (0 of them are currently executing the task)" + "is not finished on 1 of 2 hosts" in main_node_1.query_and_get_error( "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", settings=settings, From 8be4a115ac5f5cbe7bb7dac46a03c83e4aca8bee Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 4 Mar 2024 19:22:18 +0000 Subject: [PATCH 214/356] Automatic style fix --- tests/integration/test_replicated_database/test.py | 9 +++------ .../test_replicated_database_cluster_groups/test.py | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 4fa42ff6f1a..bd80ac25468 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -508,12 +508,9 @@ def test_alters_from_different_replicas(started_cluster): dummy_node.stop_clickhouse(kill=True) settings = {"distributed_ddl_task_timeout": 5} - assert ( - "is not finished on 1 of 3 hosts" - in competing_node.query_and_get_error( - "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;", - settings=settings, - ) + assert "is not finished on 1 of 3 hosts" in competing_node.query_and_get_error( + "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;", + settings=settings, ) settings = { "distributed_ddl_task_timeout": 5, diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py index b501c2573b2..91361c1850b 100644 --- a/tests/integration/test_replicated_database_cluster_groups/test.py +++ b/tests/integration/test_replicated_database_cluster_groups/test.py @@ -95,12 +95,9 @@ def test_cluster_groups(started_cluster): # Exception main_node_2.stop_clickhouse() settings = {"distributed_ddl_task_timeout": 5} - assert ( - "is not finished on 1 of 2 hosts" - in main_node_1.query_and_get_error( - "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", - settings=settings, - ) + assert "is not finished on 1 of 2 hosts" in main_node_1.query_and_get_error( + "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", + settings=settings, ) # 3. After start both groups are synced From dc7ea2f90a77eb3ab1ca092537738a1ed11c1cc5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 4 Mar 2024 08:47:28 +0100 Subject: [PATCH 215/356] Add a test for PRQL panics Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/03003_prql_panic.reference | 1 + tests/queries/0_stateless/03003_prql_panic.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/03003_prql_panic.reference create mode 100755 tests/queries/0_stateless/03003_prql_panic.sh diff --git a/tests/queries/0_stateless/03003_prql_panic.reference b/tests/queries/0_stateless/03003_prql_panic.reference new file mode 100644 index 00000000000..8e0782c44f2 --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.reference @@ -0,0 +1 @@ +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03003_prql_panic.sh b/tests/queries/0_stateless/03003_prql_panic.sh new file mode 100755 index 00000000000..63c01372686 --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Before [1] this causes a panic, but it will be fixed soon, so do not check +# for panic, but just for SYNTAX_ERROR. +# +# [1]: https://github.com/PRQL/prql/pull/4285 +$CLICKHOUSE_CLIENT --dialect prql -q "SELECT id FROM distributed_test_table GROUP BY x -> concat(concat(materialize(toNullable(NULL)))) LIMIT 3" |& grep -o -m1 SYNTAX_ERROR From 24505ba9b34c8857c2617974323a4f3302ce6d7c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 4 Mar 2024 19:36:38 +0000 Subject: [PATCH 216/356] Move userspace page cache settings to the correct section of SettingsChangeHistory.h --- src/Core/SettingsChangesHistory.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index a7f96679bbe..2952e27bb39 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,8 +85,11 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"24.2", { - {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, + {"24.3", {{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, + }}, + {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, @@ -114,9 +117,6 @@ static std::map sett {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, }}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, From 51873f0dc9d45dc6993ffdd8379727761c168cb2 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Mon, 4 Mar 2024 11:48:12 -0800 Subject: [PATCH 217/356] Just triggering build, no changes From d5825ec80d19c853b7f2ee03cdb296e17abf0e59 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 4 Mar 2024 18:34:41 +0100 Subject: [PATCH 218/356] Faster (almost 2x) mutexes (was slower due to ThreadFuzzer) Recently I noticed that DB::injection() pops up in perf top, so let's optimize it slightly: - Add -fomit-frame-pointer -momit-leaf-frame-pointer explicitly -- almost 0 effect - Add ALWAYS_INLINE for ThreadFuzzer::isStarted() (just in case) - Disable ThreadFuzzer if non of env variables had been set, this is needed to avoid extra checks in DB::injection() - Add ALWAYS_INLINE for ThreadFuzzer::injection() And here are some results for ThreadFuzzer test: - before: elapsed 6.27368 / test time 654 ms - after: elapsed 3.14167 / test time 325 ms - disabled: elapsed 2.46666 / test time 263 ms *But note, it is still slower then with ThreadFuzzer disabled.* Note, that this numbers for AMD 5975WX, for server with 2x Xeon Silver 4216 2.10: - before: elapsed 12.109 / test time 1325 ms - after: elapsed 10.506 / test time 1101 ms - disabled: elapsed 8.41043 / test time 917 ms P.S. I've also tried with the same glibc version as server had - zero changes. Refs: https://gist.github.com/azat/51a5fcc3a40af9f678906a3a6e14e079 Signed-off-by: Azat Khuzhin --- src/CMakeLists.txt | 2 + src/Common/ThreadFuzzer.cpp | 49 +++++++++++++++++++----- src/Common/ThreadFuzzer.h | 3 +- src/Common/tests/gtest_thread_fuzzer.cpp | 36 +++++++++++++++++ 4 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 src/Common/tests/gtest_thread_fuzzer.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dff70e06ce4..73aa409e995 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -174,6 +174,8 @@ endif () add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) +set_source_files_properties(Common/ThreadFuzzer.cpp PROPERTIES COMPILE_FLAGS "-fomit-frame-pointer -momit-leaf-frame-pointer") + add_library (clickhouse_malloc OBJECT Common/malloc.cpp) set_source_files_properties(Common/malloc.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin") diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 0868613d880..1d944f4a458 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -51,7 +51,11 @@ ThreadFuzzer::ThreadFuzzer() { initConfiguration(); if (!isEffective()) + { + /// It has no effect - disable it + stop(); return; + } setup(); } @@ -172,6 +176,8 @@ void ThreadFuzzer::stop() void ThreadFuzzer::start() { + if (!instance().isEffective()) + return; started.store(true, std::memory_order_relaxed); } @@ -180,11 +186,11 @@ bool ThreadFuzzer::isStarted() return started.load(std::memory_order_relaxed); } -static void injection( +static void injectionImpl( double yield_probability, double migrate_probability, double sleep_probability, - double sleep_time_us [[maybe_unused]]) + double sleep_time_us) { DENY_ALLOCATIONS_IN_SCOPE; if (!ThreadFuzzer::isStarted()) @@ -222,6 +228,19 @@ static void injection( } } +static ALWAYS_INLINE void injection( + double yield_probability, + double migrate_probability, + double sleep_probability, + double sleep_time_us) +{ + DENY_ALLOCATIONS_IN_SCOPE; + if (!ThreadFuzzer::isStarted()) + return; + + injectionImpl(yield_probability, migrate_probability, sleep_probability, sleep_time_us); +} + void ThreadFuzzer::maybeInjectSleep() { auto & fuzzer = ThreadFuzzer::instance(); @@ -286,13 +305,13 @@ void ThreadFuzzer::setup() const #if THREAD_FUZZER_WRAP_PTHREAD #define INJECTION_BEFORE(NAME) \ - injection( \ + injectionImpl( \ NAME##_before_yield_probability.load(std::memory_order_relaxed), \ NAME##_before_migrate_probability.load(std::memory_order_relaxed), \ NAME##_before_sleep_probability.load(std::memory_order_relaxed), \ NAME##_before_sleep_time_us.load(std::memory_order_relaxed)); #define INJECTION_AFTER(NAME) \ - injection( \ + injectionImpl( \ NAME##_after_yield_probability.load(std::memory_order_relaxed), \ NAME##_after_migrate_probability.load(std::memory_order_relaxed), \ NAME##_after_sleep_probability.load(std::memory_order_relaxed), \ @@ -383,13 +402,16 @@ static void * getFunctionAddress(const char * name) static constinit RET(*real_##NAME)(__VA_ARGS__) = nullptr; \ extern "C" RET NAME(__VA_ARGS__) \ { \ - INJECTION_BEFORE(NAME); \ + bool thread_fuzzer_enabled = ThreadFuzzer::isStarted(); \ + if (thread_fuzzer_enabled) \ + INJECTION_BEFORE(NAME); \ if (unlikely(!real_##NAME)) { \ real_##NAME = \ reinterpret_cast(getFunctionAddress(#NAME)); \ } \ auto && ret{real_##NAME(arg)}; \ - INJECTION_AFTER(NAME); \ + if (thread_fuzzer_enabled) \ + INJECTION_AFTER(NAME); \ return ret; \ } FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_DLSYM) @@ -399,10 +421,17 @@ FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_DLSYM) extern "C" RET __##NAME(__VA_ARGS__); \ extern "C" RET NAME(__VA_ARGS__) \ { \ - INJECTION_BEFORE(NAME); \ - auto && ret{__##NAME(arg)}; \ - INJECTION_AFTER(NAME); \ - return ret; \ + if (!ThreadFuzzer::isStarted()) \ + { \ + return __##NAME(arg); \ + } \ + else \ + { \ + INJECTION_BEFORE(NAME); \ + auto && ret{__##NAME(arg)}; \ + INJECTION_AFTER(NAME); \ + return ret; \ + } \ } FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_INTERNAL_SYMBOLS) #undef MAKE_WRAPPER_USING_INTERNAL_SYMBOLS diff --git a/src/Common/ThreadFuzzer.h b/src/Common/ThreadFuzzer.h index 9dd55fe7995..1cff27a7588 100644 --- a/src/Common/ThreadFuzzer.h +++ b/src/Common/ThreadFuzzer.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -56,7 +57,7 @@ public: static void stop(); static void start(); - static bool isStarted(); + static bool ALWAYS_INLINE isStarted(); static void maybeInjectSleep(); static void maybeInjectMemoryLimitException(); diff --git a/src/Common/tests/gtest_thread_fuzzer.cpp b/src/Common/tests/gtest_thread_fuzzer.cpp new file mode 100644 index 00000000000..ac2ccebebc6 --- /dev/null +++ b/src/Common/tests/gtest_thread_fuzzer.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include + +TEST(ThreadFuzzer, mutex) +{ + /// Initialize ThreadFuzzer::started + DB::ThreadFuzzer::instance().isEffective(); + + std::mutex mutex; + std::atomic elapsed_ns = 0; + + auto func = [&]() + { + Stopwatch watch; + for (size_t i = 0; i < 1e6; ++i) + { + mutex.lock(); + mutex.unlock(); + } + elapsed_ns += watch.elapsedNanoseconds(); + }; + + std::vector> threads(10); + + for (auto & thread : threads) + thread.emplace(func); + + for (auto & thread : threads) + thread->join(); + + std::cout << "elapsed: " << elapsed_ns/1e9 << "\n"; +} From e715ff622d50b3c1dda8c4e90bcb97c431b6a81b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 5 Mar 2024 00:52:13 +0100 Subject: [PATCH 219/356] Better --- docker/test/fuzzer/run-fuzzer.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 1a1888861af..0234ead69d0 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -247,8 +247,11 @@ quit fuzzer_pid=$! echo "Fuzzer pid is $fuzzer_pid" + # The fuzzer_pid belongs to the timeout process. + actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid") + echo "Attaching gdb to the fuzzer itself" - gdb -batch -command script.gdb -p $fuzzer_pid & + gdb -batch -command script.gdb -p $actual_fuzzer_pid & # Wait for the fuzzer to complete. # Note that the 'wait || ...' thing is required so that the script doesn't From 4aba9eb6e527b4fb84339e09fe8e7e4b2fb63205 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 Mar 2024 04:44:20 +0300 Subject: [PATCH 220/356] Update 03003_prql_panic.sh --- tests/queries/0_stateless/03003_prql_panic.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03003_prql_panic.sh b/tests/queries/0_stateless/03003_prql_panic.sh index 63c01372686..01d7beed99b 100755 --- a/tests/queries/0_stateless/03003_prql_panic.sh +++ b/tests/queries/0_stateless/03003_prql_panic.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Requires Rust, which is not built for Fast Test. CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From c735caf44f7dfcc245d6c77ca111dee686e914a8 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:54:13 +0800 Subject: [PATCH 221/356] Fix typo --- docker/test/upgrade/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 0354c14bef5..580ddb65597 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -81,7 +81,7 @@ remove_keeper_config "create_if_not_exists" "[01]" sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ | sed "s|azure|azure_blob_storage|" \ > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp -sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ From b5fb12c6ec580f3bf7a165eb32456c0d196a374c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:54:58 +0800 Subject: [PATCH 222/356] Fix typo --- docker/test/upgrade/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 580ddb65597..ec30cbf128e 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -128,7 +128,7 @@ sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-serv sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ | sed "s|azure|azure_blob_storage|" \ > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp -sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml #todo: remove these after 24.3 released. sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ From 9a3312f12d9a7c25e6eab6b32fb6198973c0e68b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:55:32 +0800 Subject: [PATCH 223/356] Remove debug --- docker/test/upgrade/run.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index ec30cbf128e..12151aa6a3c 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -105,8 +105,6 @@ rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml -cat /etc/clickhouse-server/config.d/storage_conf.xml - start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log From 18b17523e403998fdaaedbfb08736de3a20b38a5 Mon Sep 17 00:00:00 2001 From: lzydmxy <13126752315@163.com> Date: Tue, 5 Mar 2024 15:32:17 +0800 Subject: [PATCH 224/356] Move connection drain from prepare to work --- src/Processors/Sources/RemoteSource.cpp | 18 ++++++++++++++++-- src/Processors/Sources/RemoteSource.h | 3 +++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index dfa311a7079..c09657ece35 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -72,19 +72,33 @@ ISource::Status RemoteSource::prepare() if (is_async_state) return Status::Async; + if (executor_finished) + return Status::Finished; + Status status = ISource::prepare(); /// To avoid resetting the connection (because of "unfinished" query) in the /// RemoteQueryExecutor it should be finished explicitly. if (status == Status::Finished) { - query_executor->finish(); is_async_state = false; - return status; + need_drain = true; + return Status::Ready; } return status; } +void RemoteSource::work() +{ + if (need_drain) + { + query_executor->finish(); + executor_finished = true; + return; + } + ISource::work(); +} + std::optional RemoteSource::tryGenerate() { /// onCancel() will do the cancel if the query was sent. diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index dbfa0156331..80d84068e39 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -23,6 +23,7 @@ public: ~RemoteSource() override; Status prepare() override; + void work() override; String getName() const override { return "Remote"; } void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } @@ -40,6 +41,8 @@ protected: private: bool was_query_sent = false; + bool need_drain = false; + bool executor_finished = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; RowsBeforeLimitCounterPtr rows_before_limit; From 4251cc3a117682329a437db0a5e92f876d59821f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Mar 2024 17:05:40 +0800 Subject: [PATCH 225/356] fix building --- src/Functions/multiIf.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 5bf9f2af420..f5cb2375c53 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -419,8 +420,13 @@ private: calculateInserts(instructions, rows, inserts); res_data.resize_exact(rows); - if (res_null_map) + if constexpr (nullable_result) + { + if (res_null_map) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid result null_map while result type is nullable"); + res_null_map->resize_exact(rows); + } std::vector data_cols(instructions.size(), nullptr); std::vector null_map_cols(instructions.size(), nullptr); From 7426baa065cc053bb64b2e31d4c240e3e0d8a02d Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 10:20:14 +0100 Subject: [PATCH 226/356] Update test --- .../00849_multiple_comma_join_2.sql | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index 51bf5a2ede1..6530f691087 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -62,49 +62,49 @@ SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explai --- EXPLAIN QUERY TREE SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; INSERT INTO t1 values (1,1), (2,2), (3,3), (4,4); INSERT INTO t2 values (1,1), (1, Null); From a6f5323c38d8420765332e3e778399de3fc69acf Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 5 Mar 2024 10:09:17 +0000 Subject: [PATCH 227/356] CI: Fix pending status for build report in backports #do_not_test --- tests/ci/build_report_check.py | 2 +- tests/ci/ci_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 94e429ad77b..48640f15ac0 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -51,7 +51,7 @@ def main(): builds_for_check = CI_CONFIG.get_builds_for_report( build_check_name, release=pr_info.is_release(), - backport=pr_info.head_ref.startswith("backport"), + backport=pr_info.head_ref.startswith("backport/"), ) required_builds = len(builds_for_check) missing_builds = 0 diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b9c332e8e18..b9ee5670066 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -586,7 +586,7 @@ class CIConfig: Build.PACKAGE_TSAN, Build.PACKAGE_DEBUG, ] - if release and report_name == JobNames.BUILD_CHECK_SPECIAL: + if (release or backport) and report_name == JobNames.BUILD_CHECK_SPECIAL: return [ Build.BINARY_DARWIN, Build.BINARY_DARWIN_AARCH64, From edf2dc91687786742687624af40d1ac93c92f16b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 5 Mar 2024 10:20:57 +0000 Subject: [PATCH 228/356] Update version_date.tsv and changelogs after v23.8.10.43-lts --- docs/changelogs/v23.8.10.43-lts.md | 39 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 40 insertions(+) create mode 100644 docs/changelogs/v23.8.10.43-lts.md diff --git a/docs/changelogs/v23.8.10.43-lts.md b/docs/changelogs/v23.8.10.43-lts.md new file mode 100644 index 00000000000..0093467d129 --- /dev/null +++ b/docs/changelogs/v23.8.10.43-lts.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.10.43-lts (a278225bba9) FIXME as compared to v23.8.9.54-lts (192a1d231fa) + +#### Improvement +* Backported in [#58819](https://github.com/ClickHouse/ClickHouse/issues/58819): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#60286](https://github.com/ClickHouse/ClickHouse/issues/60286): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59879](https://github.com/ClickHouse/ClickHouse/issues/59879): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)). +* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 572ceddf590..53bf705637d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -25,6 +25,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 v23.8.7.24-lts 2023-11-17 From 738c481cce72716fb8e6cd2295ee22040a2a14a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 5 Mar 2024 10:36:42 +0000 Subject: [PATCH 229/356] Fix fuzzer report --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 9358e88e1e8..ccf450c94f2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -387,7 +387,7 @@ if [ -f core.zst ]; then fi # Keep all the lines in the paragraphs containing that either contain or don't start with 20... (year) -sed -n '//,/^$/p' s.log | awk '// || !/^20/' server.log > fatal.log ||: +sed -n '//,/^$/p' server.log | awk '// || !/^20/' > fatal.log ||: FATAL_LINK='' if [ -s fatal.log ]; then FATAL_LINK='fatal.log' From ee3edf25894bc902fba96c95a0ad151d7b1a717c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 5 Mar 2024 10:40:45 +0000 Subject: [PATCH 230/356] Fix system parts _state --- src/Storages/System/StorageSystemPartsBase.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 631b316167c..5921fbc8f0f 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -249,6 +249,9 @@ void StorageSystemPartsBase::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + if (has_state_column) + header.insert(ColumnWithTypeAndName(std::make_shared(), "_state")); + auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( @@ -264,17 +267,12 @@ void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline auto header = getOutputStream().header; MutableColumns res_columns = header.cloneEmptyColumns(); - if (has_state_column) - res_columns.push_back(ColumnString::create()); while (StoragesInfo info = stream->next()) { storage->processNextStorage(context, res_columns, columns_mask, info, has_state_column); } - if (has_state_column) - header.insert(ColumnWithTypeAndName(std::make_shared(), "_state")); - UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); From 0bd4c6afa0cb470b003216a8d75a85131831873b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 5 Mar 2024 10:49:01 +0100 Subject: [PATCH 231/356] Use while-loop --- src/Storages/StorageS3.cpp | 69 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6892e75f506..b73965b8774 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -258,45 +258,46 @@ private: KeyWithInfoPtr nextAssumeLocked() { - if (buffer_iter != buffer.end()) + do { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) + if (buffer_iter != buffer.end()) { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); + auto answer = *buffer_iter; + ++buffer_iter; + + /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. + /// So we get object info lazily here on 'next()' request. + if (!answer->info) + { + answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); + if (file_progress_callback) + file_progress_callback(FileProgress(0, answer->info->size)); + } + + return answer; } - return answer; - } + if (is_finished) + return {}; - if (is_finished) - return {}; - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - - return nextAssumeLocked(); + try + { + fillInternalBufferAssumeLocked(); + } + catch (...) + { + /// In case of exception thrown while listing new batch of files + /// iterator may be partially initialized and its further using may lead to UB. + /// Iterator is used by several processors from several threads and + /// it may take some time for threads to stop processors and they + /// may still use this iterator after exception is thrown. + /// To avoid this UB, reset the buffer and return defaults for further calls. + is_finished = true; + buffer.clear(); + buffer_iter = buffer.begin(); + throw; + } + } while (true); } void fillInternalBufferAssumeLocked() From 7dfe8afa76806449aed35faaff1b0e7dbe87849b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 5 Mar 2024 11:11:18 +0000 Subject: [PATCH 232/356] Add in-source docs --- src/Functions/toMillisecond.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp index e15b56cc555..aaef517c996 100644 --- a/src/Functions/toMillisecond.cpp +++ b/src/Functions/toMillisecond.cpp @@ -1,6 +1,7 @@ -#include +#include #include #include +#include namespace DB { @@ -9,7 +10,21 @@ using FunctionToMillisecond = FunctionDateOrDateTimeToSomething(); + factory.registerFunction( + + + FunctionDocumentation{ + .description=R"( +Returns the millisecond component (0-999) of a date with time. + )", + .syntax="toMillisecond(value)", + .arguments={{"value", "DateTime or DateTime64"}}, + .returned_value="The millisecond in the minute (0 - 59) of the given date/time", + .examples{ + {"toMillisecond", "SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)", "456"}}, + .categories{"Dates and Times"} + } + ); /// MySQL compatibility alias. factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); From a80b3ad2f4208a32d7105da202bc6f2ba736510c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:15:31 +0100 Subject: [PATCH 233/356] Fix test --- tests/queries/0_stateless/02722_database_filesystem.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index f3af7abcbb3..0c75c15fc69 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -81,8 +81,6 @@ CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); # FILE_DOESNT_EXIST: unknown file ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "FILE_DOESNT_EXIST" > /dev/null && echo "OK" || echo 'FAIL' ||: -# BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" rm -rd $tmp_dir From d155f266fd25a7a35dccc0d4e3cffce1fbc07304 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:15:48 +0100 Subject: [PATCH 234/356] Fix test reference --- tests/queries/0_stateless/02722_database_filesystem.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference index 17f84cfc49c..2aa7e74befb 100644 --- a/tests/queries/0_stateless/02722_database_filesystem.reference +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -23,4 +23,3 @@ OK OK OK OK -OK From 23ec4d4c6e386b67953cee01be5c740f47e39c78 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 5 Mar 2024 11:53:19 +0000 Subject: [PATCH 235/356] Update version_date.tsv and changelogs after v23.3.20.27-lts --- docs/changelogs/v23.3.20.27-lts.md | 29 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 30 insertions(+) create mode 100644 docs/changelogs/v23.3.20.27-lts.md diff --git a/docs/changelogs/v23.3.20.27-lts.md b/docs/changelogs/v23.3.20.27-lts.md new file mode 100644 index 00000000000..9f49e47f0bc --- /dev/null +++ b/docs/changelogs/v23.3.20.27-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.3.20.27-lts (cc974ba4f81) FIXME as compared to v23.3.19.32-lts (c4d4ca8ec02) + +#### Improvement +* Backported in [#58818](https://github.com/ClickHouse/ClickHouse/issues/58818): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59877](https://github.com/ClickHouse/ClickHouse/issues/59877): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 53bf705637d..e372e407ce1 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -55,6 +55,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 v23.3.17.13-lts 2023-11-17 From 24437cdd637dd56401990c839191306b8533a41f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 5 Mar 2024 11:55:04 +0000 Subject: [PATCH 236/356] Polish test --- tests/queries/0_stateless/03000_minmax_index_first.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql index 5d2896c792a..d978e6ea8fa 100644 --- a/tests/queries/0_stateless/03000_minmax_index_first.sql +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -14,3 +14,5 @@ SETTINGS index_granularity = 8192; INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(100000); SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain like '%Name%'; + +DROP TABLE skip_table; From 3058e5995046d986635d17b515e1a84da91e53a9 Mon Sep 17 00:00:00 2001 From: Brokenice0415 Date: Tue, 5 Mar 2024 20:06:39 +0800 Subject: [PATCH 237/356] update notices --- src/Coordination/CoordinationSettings.h | 2 +- src/Coordination/KeeperServer.cpp | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 3c8f1d271d0..a32552616ee 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -26,7 +26,7 @@ struct Settings; M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \ M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \ - M(Milliseconds, leadership_expiry_ms, 0, "How long will a leader expire after not getting enough peer responses. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \ + M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \ M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \ M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 80d61518e19..57dc9596038 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -319,16 +319,13 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning( coordination_settings->leadership_expiry_ms.totalMilliseconds(), "leadership_expiry_ms", log); - if (params.leadership_expiry_ > 0) + if (params.leadership_expiry_ > 0 && params.leadership_expiry_ <= params.election_timeout_lower_bound_) { - if (params.leadership_expiry_ < params.election_timeout_lower_bound_) - { - LOG_WARNING( - log, - "leadership_expiry_ is smaller than election_timeout_lower_bound_ms. " - "Notice that too small leadership_expiry_ may make Raft group " - "sensitive to network status."); - } + LOG_INFO( + log, + "leadership_expiry_ is smaller than or equal to election_timeout_lower_bound_ms, which can avoid multiple leaders. " + "Notice that too small leadership_expiry_ may make Raft group sensitive to network status. " + ); } params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log); From bbe8f5b3c45d96aaf963d972b36115d2a659bf43 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 7 Dec 2023 17:29:47 +0100 Subject: [PATCH 238/356] adjust a test --- ...922_deduplication_with_zero_copy.reference | 2 -- .../02922_deduplication_with_zero_copy.sh | 21 +++++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference index e394fe46b11..29376f82128 100644 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference @@ -1,8 +1,6 @@ zero_copy: -all_0_0_0 tables: table -alter_partition_version async_blocks block_numbers blocks columns leader_election log lost_part_count metadata mutations nonincrement_block_numbers part_moves_shard pinned_part_uuids quorum replicas table_shared_id temp zero_copy_hdfs zero_copy_s3 zero_copy: tables: diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh index 77950d923f0..381311c5033 100755 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh @@ -3,7 +3,7 @@ set -e -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CURDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -27,6 +27,7 @@ create table r2 (n int) allow_remote_fs_zero_copy_replication=1; " + function get_shared_locks() { table_shared_id="$1" @@ -42,11 +43,12 @@ function get_shared_locks() done } + function filter_temporary_locks() { while read -r lock do - owner=$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat ${lock}" | grep 'ephemeralOwner' | sed 's/.*= //') + owner="$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat ${lock}" | grep 'ephemeralOwner' | sed 's/.*= //')" if [[ "${owner}" -eq "0" ]] then echo "${lock}" @@ -63,10 +65,11 @@ function insert_duplicates() { wait $CLICKHOUSE_CLIENT -nm -q " +system sync replica r1; system sync replica r2; " - count=$($CLICKHOUSE_CLIENT -q "select count() from r2;") + count="$($CLICKHOUSE_CLIENT -q "select count() from r2;")" [[ "${count}" -eq "1" ]] } @@ -89,8 +92,8 @@ system sync replica r2; " done - persistent_locks="$(get_shared_locks ${table_shared_id} | filter_temporary_locks)" - num=$(echo "${persistent_locks}" | wc -w) + persistent_locks="$(get_shared_locks "${table_shared_id}" | filter_temporary_locks)" + num="$(echo "${persistent_locks}" | wc -w)" if [[ "${num}" -ne "2" ]] then @@ -101,21 +104,23 @@ system sync replica r2; } + export -f query_with_retry export -f filter_temporary_locks export -f insert_duplicates export -f get_shared_locks export -f loop -table_shared_id=$($CLICKHOUSE_KEEPER_CLIENT -q "get /test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id") +table_shared_id="$($CLICKHOUSE_KEEPER_CLIENT -q "get /test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id")" exit_code=0 -timeout 40 bash -c loop "${table_shared_id}" || exit_code="${?}" +timeout 40 bash -c "loop '${table_shared_id}'" || exit_code="${?}" if [[ "${exit_code}" -ne "124" ]] then echo "timeout expected, but loop exited with code: ${exit_code}." echo "the error is found if loop ends with 0." + echo "table_shared_id=${table_shared_id}" exit 1 fi @@ -125,11 +130,9 @@ function list_keeper_nodes() { echo "zero_copy:" $CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3" | grep -o "${table_shared_id}" | \ sed "s/${table_shared_id}//g" || : - $CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3/${table_shared_id}" 2>/dev/null || : echo "tables:" $CLICKHOUSE_KEEPER_CLIENT -q "ls /test/02922/${CLICKHOUSE_DATABASE}" | grep -o "table" || : - $CLICKHOUSE_KEEPER_CLIENT -q "ls /test/02922/${CLICKHOUSE_DATABASE}/table" 2>/dev/null || : } list_keeper_nodes "${table_shared_id}" From 82cc3f4339e856b229c7d9b988f3d4fcad0397be Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 26 Feb 2024 14:52:05 +0000 Subject: [PATCH 239/356] CI: make workflow yml abstract #no_merge_commit #no_ci_cache #job_style_check #job_package_debug #job_ClickHouse_build_check #job_Stateless_tests_debug #job_binary_release --- .github/workflows/pull_request.yml | 855 ++------------------- .github/workflows/reusable_build_stage.yml | 32 + .github/workflows/reusable_test_stage.yml | 25 + tests/ci/ci.py | 28 +- tests/ci/ci_config.py | 227 ++++-- tests/ci/test_ci_config.py | 15 +- 6 files changed, 318 insertions(+), 864 deletions(-) create mode 100644 .github/workflows/reusable_build_stage.yml create mode 100644 .github/workflows/reusable_test_stage.yml diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8783f959ec6..c065219f980 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -13,9 +13,7 @@ on: # yamllint disable-line rule:truthy - opened branches: - master -########################################################################################## -##################################### SMALL CHECKS ####################################### -########################################################################################## + jobs: RunConfig: runs-on: [self-hosted, style-checker-aarch64] @@ -70,13 +68,13 @@ jobs: python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check' BuildDockers: needs: [RunConfig] - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} uses: ./.github/workflows/reusable_docker.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Style check')}} uses: ./.github/workflows/reusable_test.yml with: test_name: Style check @@ -89,19 +87,9 @@ jobs: ROBOT_CLICKHOUSE_SSH_KEY< Dict[str, Dict[str, Any]]: + """ + populates GH Actions' workflow with real jobs + "Builds_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + "Tests_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + ... + """ + result = {} # type: Dict[str, Any] + stages_to_do = [] + for job in jobs_data["jobs_to_do"]: + stage_type = CI_CONFIG.get_job_ci_stage(job) + if stage_type == CIStages.NA: + continue + if stage_type not in result: + result[stage_type] = [] + stages_to_do.append(stage_type) + result[stage_type].append( + {"job_name": job, "runner_type": CI_CONFIG.get_runner_type(job)} + ) + result["stages_to_do"] = stages_to_do + return result + + def _create_gh_status( commit: Any, job: str, batch: int, num_batches: int, job_status: CommitStatusData ) -> None: @@ -1733,6 +1756,7 @@ def main() -> int: result["build"] = build_digest result["docs"] = docs_digest result["ci_flags"] = ci_flags + result["stages_data"] = _generate_ci_stage_config(jobs_data) result["jobs_data"] = jobs_data result["docker_data"] = docker_data ### CONFIGURE action: end diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b9ee5670066..308a9098c29 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from copy import deepcopy import logging from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field @@ -10,6 +11,24 @@ from ci_utils import WithIter from integration_test_images import IMAGES +class CIStages: + NA = "UNKNOWN" + BUILDS_1 = "Builds_1" + BUILDS_2 = "Builds_2" + TESTS_1 = "Tests_1" + TESTS_2 = "Tests_2" + + +class Runners(metaclass=WithIter): + BUILDER = "builder" + STYLE_CHECKER = "style-checker" + STYLE_CHECKER_ARM = "style-checker-aarch64" + FUNC_TESTER = "func-tester" + FUNC_TESTER_ARM = "func-tester-aarch64" + STRESS_TESTER = "stress-tester" + FUZZER_UNIT_TESTER = "fuzzer-unit-tester" + + class Labels(metaclass=WithIter): """ Label names or commit tokens in normalized form @@ -207,6 +226,45 @@ class JobConfig: random_bucket: str = "" +builds_job_config = JobConfig( + required_on_release_branch=True, + digest=DigestConfig( + include_paths=[ + "./src", + "./contrib/*-cmake", + "./contrib/consistent-hashing", + "./contrib/murmurhash", + "./contrib/libfarmhash", + "./contrib/pdqsort", + "./contrib/cityhash102", + "./contrib/sparse-checkout", + "./contrib/libmetrohash", + "./contrib/update-submodules.sh", + "./contrib/CMakeLists.txt", + "./CMakeLists.txt", + "./PreLoad.cmake", + "./cmake", + "./base", + "./programs", + "./packages", + "./docker/packager/packager", + "./rust", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", + ], + exclude_files=[".md"], + docker=["clickhouse/binary-builder"], + git_submodules=True, + ), + run_command="build_check.py $BUILD_NAME", +) +fuzzer_build_job_config = deepcopy(builds_job_config) +fuzzer_build_job_config.run_by_label = Labels.libFuzzer + + @dataclass class BuildConfig: name: str @@ -222,43 +280,7 @@ class BuildConfig: sparse_checkout: bool = False comment: str = "" static_binary_name: str = "" - job_config: JobConfig = field( - default_factory=lambda: JobConfig( - required_on_release_branch=True, - digest=DigestConfig( - include_paths=[ - "./src", - "./contrib/*-cmake", - "./contrib/consistent-hashing", - "./contrib/murmurhash", - "./contrib/libfarmhash", - "./contrib/pdqsort", - "./contrib/cityhash102", - "./contrib/sparse-checkout", - "./contrib/libmetrohash", - "./contrib/update-submodules.sh", - "./contrib/CMakeLists.txt", - "./CMakeLists.txt", - "./PreLoad.cmake", - "./cmake", - "./base", - "./programs", - "./packages", - "./docker/packager/packager", - "./rust", - # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact - # when there are changes in performance test scripts. - # Due to the current design of the perf test we need to rebuild CH when the performance test changes, - # otherwise the changes will not be visible in the PerformanceTest job in CI - "./tests/performance", - ], - exclude_files=[".md"], - docker=["clickhouse/binary-builder"], - git_submodules=True, - ), - run_command="build_check.py $BUILD_NAME", - ) - ) + job_config: JobConfig = field(default_factory=lambda: deepcopy(builds_job_config)) def export_env(self, export: bool = False) -> str: def process(field_name: str, field: Union[bool, str]) -> str: @@ -466,6 +488,19 @@ sql_test_params = { "run_command": "sqltest.py", "timeout": 10800, } +clickbench_test_params = { + "digest": DigestConfig( + include_paths=[ + "tests/ci/clickbench.py", + ], + docker=["clickhouse/clickbench"], + ), + "run_command": 'clickbench.py "$CHECK_NAME"', +} +install_test_params = { + "digest": install_check_digest, + "run_command": 'install_check.py "$CHECK_NAME"', +} @dataclass @@ -487,6 +522,37 @@ class CIConfig: return config return None + def get_job_ci_stage(self, job_name: str) -> str: + if job_name in [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + JobNames.JEPSEN_KEEPER, + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ]: + # FIXME: we can't currently handle Jepsen in the Stage as it's job has concurrency directive + # BUILD_CHECK and BUILD_CHECK_SPECIAL runs not in stage because we need them even if Builds stage failed + return CIStages.NA + stage_type = None + if self.is_build_job(job_name): + stage_type = CIStages.BUILDS_1 + if job_name in CI_CONFIG.get_builds_for_report( + JobNames.BUILD_CHECK_SPECIAL + ): + # special builds go to Build_2 stage to not delay Builds_1/Test_1 + stage_type = CIStages.BUILDS_2 + elif self.is_docs_job(job_name): + stage_type = CIStages.TESTS_1 + elif job_name == JobNames.BUILD_CHECK_SPECIAL: + stage_type = CIStages.TESTS_2 + elif self.is_test_job(job_name): + stage_type = CIStages.TESTS_1 + if job_name == JobNames.LIBFUZZER_TEST: + # since fuzzers build in Builds_2, test must be in Tests_2 + stage_type = CIStages.TESTS_2 + assert stage_type, f"BUG [{job_name}]" + return stage_type + def get_job_config(self, check_name: str) -> JobConfig: res = None for config in ( @@ -500,6 +566,63 @@ class CIConfig: break return res # type: ignore + def get_runner_type(self, check_name: str) -> str: + result = None + if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: + result = Runners.BUILDER + elif any( + words in check_name.lower() + for words in [ + "install packages", + "compatibility check", + "docker", + "build check", + "jepsen", + "style check", + ] + ): + result = Runners.STYLE_CHECKER + elif check_name == JobNames.DOCS_CHECK: + # docs job is demanding + result = Runners.FUNC_TESTER_ARM + elif any( + words in check_name.lower() + for words in [ + "stateless", + "stateful", + "clickbench", + "sqllogic test", + "libfuzzer", + "bugfix validation", + ] + ): + result = Runners.FUNC_TESTER + elif any( + words in check_name.lower() + for words in ["stress", "upgrade", "integration", "performance comparison"] + ): + result = Runners.STRESS_TESTER + elif any( + words in check_name.lower() + for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] + ): + result = Runners.FUZZER_UNIT_TESTER + + assert result, f"BUG, no runner for [{check_name}]" + + if ("aarch" in check_name or "arm" in check_name) and "aarch" not in result: + if result == Runners.STRESS_TESTER: + # FIXME: no arm stress tester group atm + result = Runners.FUNC_TESTER_ARM + elif result == Runners.BUILDER: + # crosscompile - no arm required + pass + else: + # switch to aarch64 runnner + result += "-aarch64" + + return result + @staticmethod def normalize_string(input_string: str) -> str: lowercase_string = input_string.lower() @@ -600,11 +723,7 @@ class CIConfig: @classmethod def is_test_job(cls, job: str) -> bool: - return ( - not cls.is_build_job(job) - and not cls.is_build_job(job) - and job != JobNames.STYLE_CHECK - ) + return not cls.is_build_job(job) and job != JobNames.STYLE_CHECK @classmethod def is_docs_job(cls, job: str) -> bool: @@ -845,7 +964,7 @@ CI_CONFIG = CIConfig( name=Build.FUZZERS, compiler="clang-17", package_type="fuzzers", - job_config=JobConfig(run_by_label=Labels.libFuzzer), + job_config=fuzzer_build_job_config, ), }, builds_report_config={ @@ -889,6 +1008,7 @@ CI_CONFIG = CIConfig( include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], docker=["clickhouse/docs-builder"], ), + run_command="docs_check.py", ), ), JobNames.FAST_TEST: TestConfig( @@ -918,10 +1038,10 @@ CI_CONFIG = CIConfig( }, test_configs={ JobNames.INSTALL_TEST_AMD: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(digest=install_check_digest) + Build.PACKAGE_RELEASE, job_config=JobConfig(**install_test_params) # type: ignore ), JobNames.INSTALL_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(digest=install_check_digest) + Build.PACKAGE_AARCH64, job_config=JobConfig(**install_test_params) # type: ignore ), JobNames.STATEFUL_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore @@ -1139,9 +1259,20 @@ CI_CONFIG = CIConfig( JobNames.SQLTEST: TestConfig( Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params) # type: ignore ), - JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE), - JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64), - JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS, job_config=JobConfig(run_by_label=Labels.libFuzzer)), # type: ignore + JobNames.CLCIKBENCH_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**clickbench_test_params) # type: ignore + ), + JobNames.CLCIKBENCH_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**clickbench_test_params) # type: ignore + ), + JobNames.LIBFUZZER_TEST: TestConfig( + Build.FUZZERS, + job_config=JobConfig( + run_by_label=Labels.libFuzzer, + timeout=10800, + run_command='libfuzzer_test_check.py "$CHECK_NAME" 10800', + ), + ), # type: ignore }, ) CI_CONFIG.validate() diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 49d49d9c328..04c90105276 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,15 +1,12 @@ #!/usr/bin/env python3 import unittest +from ci_config import JobNames, CI_CONFIG, Runners class TestCIConfig(unittest.TestCase): - def test_no_errors_in_ci_config(self): - raised = None - try: - from ci_config import ( # pylint: disable=import-outside-toplevel - CI_CONFIG as _, - ) - except Exception as exc: - raised = exc - self.assertIsNone(raised, f"CI_CONFIG import raised error {raised}") + def test_runner_config(self): + """check runner is provided w/o exception""" + for job in JobNames: + runner = CI_CONFIG.get_runner_type(job) + self.assertIn(runner, Runners) From 6e085c713f03d722c0881241bbd3bf81802d3298 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 1 Feb 2024 19:01:04 +0100 Subject: [PATCH 240/356] generate key for clickhouse_remove_objects_capability --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 8 ++++---- src/Disks/ObjectStorages/S3/DiskS3Utils.cpp | 13 +++++++------ src/Disks/ObjectStorages/S3/DiskS3Utils.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index f4df579de73..898c7eb98fa 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -96,10 +96,10 @@ S3::URI getS3URI(const Poco::Util::AbstractConfiguration & config, const std::st } void checkS3Capabilities( - S3ObjectStorage & storage, const S3Capabilities s3_capabilities, const String & name, const String & key_with_trailing_slash) + S3ObjectStorage & storage, const S3Capabilities s3_capabilities, const String & name) { /// If `support_batch_delete` is turned on (default), check and possibly switch it off. - if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash)) + if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage)) { LOG_WARNING( getLogger("S3ObjectStorage"), @@ -134,7 +134,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*object_storage, s3_capabilities, name); return object_storage; }); @@ -170,7 +170,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*object_storage, s3_capabilities, name); return object_storage; }); diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp index bb7b53b2d22..529df59ca17 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp @@ -79,13 +79,14 @@ static String getServerUUID() return toString(server_uuid); } -bool checkBatchRemove(S3ObjectStorage & storage, const String & key_with_trailing_slash) +bool checkBatchRemove(S3ObjectStorage & storage) { - /// NOTE: key_with_trailing_slash is the disk prefix, it is required - /// because access is done via S3ObjectStorage not via IDisk interface - /// (since we don't have disk yet). - const String path = fmt::format("{}clickhouse_remove_objects_capability_{}", key_with_trailing_slash, getServerUUID()); - StoredObject object(path); + /// NOTE: Here we are going to write and later drop some key. + /// We are using generateObjectKeyForPath() which returns random object key. + /// That generated key is placed in a right directory where we should have write access. + const String path = fmt::format("clickhouse_remove_objects_capability_{}", getServerUUID()); + const auto key = storage.generateObjectKeyForPath(path); + StoredObject object(key.serialize(), path); try { auto file = storage.writeObject(object, WriteMode::Rewrite); diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.h b/src/Disks/ObjectStorages/S3/DiskS3Utils.h index 29e39d4bc1b..ddd204d449d 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.h +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.h @@ -18,7 +18,7 @@ ObjectStorageKeysGeneratorPtr getKeyGenerator( const String & config_prefix); class S3ObjectStorage; -bool checkBatchRemove(S3ObjectStorage & storage, const std::string & key_with_trailing_slash); +bool checkBatchRemove(S3ObjectStorage & storage); } From 248c93ed2dd46ae73cc086a2d14ce62be1f61d94 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 14:37:16 +0100 Subject: [PATCH 241/356] Change error code --- src/Interpreters/executeQuery.cpp | 3 ++- tests/queries/0_stateless/03003_analyzer_setting.sql | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 18f0cd6601f..f318f363eda 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -104,6 +104,7 @@ namespace ErrorCodes extern const int QUERY_WAS_CANCELLED; extern const int INCORRECT_DATA; extern const int SYNTAX_ERROR; + extern const int INCORRECT_QUERY; } namespace FailPoints @@ -669,7 +670,7 @@ void validateAnalyzerSettings(ASTPtr ast, bool context_value) if (auto * value = set_query->changes.tryGet("allow_experimental_analyzer")) { if (top_level != value->safeGet()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level); } } diff --git a/tests/queries/0_stateless/03003_analyzer_setting.sql b/tests/queries/0_stateless/03003_analyzer_setting.sql index 43e1bd0d955..2e5cab71277 100644 --- a/tests/queries/0_stateless/03003_analyzer_setting.sql +++ b/tests/queries/0_stateless/03003_analyzer_setting.sql @@ -2,8 +2,8 @@ CREATE TABLE test (dummy Int8) ENGINE = Distributed(test_cluster_two_shards, 'sy SET allow_experimental_analyzer = 0; -SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 1); -- { serverError LOGICAL_ERROR} +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 1); -- { serverError INCORRECT_QUERY } SET allow_experimental_analyzer = 1; -SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 0); -- { serverError LOGICAL_ERROR} +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 0); -- { serverError INCORRECT_QUERY } From b4e90e512115071bc0edd9d25a9b9dcac00e9214 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Tue, 5 Mar 2024 08:49:33 +0100 Subject: [PATCH 242/356] Address PR comments --- src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++-- ...001_insert_threads_deduplication.reference | 2 ++ .../03001_insert_threads_deduplication.sql | 27 +++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index df833803970..5680857ed3d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -515,9 +515,13 @@ BlockIO InterpreterInsertQuery::execute() : std::min(settings.max_insert_threads, pipeline.getNumStreams()); /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - const String & deduplication_token = settings.insert_deduplication_token; - if (!deduplication_token.empty()) + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); pre_streams_size = 1; + } if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference index 2559e5c49e7..b6d6006f84c 100644 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference @@ -1,2 +1,4 @@ 3 6 +12 +18 diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sql b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql index 5b5cb1d6845..093d2b3185d 100644 --- a/tests/queries/0_stateless/03001_insert_threads_deduplication.sql +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql @@ -1,4 +1,7 @@ +-- Tags: distributed + DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS landing_dist SYNC; DROP TABLE IF EXISTS ds SYNC; CREATE TABLE landing @@ -10,6 +13,14 @@ CREATE TABLE landing ENGINE = MergeTree() ORDER BY timestamp; +CREATE TABLE landing_dist +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'landing', rand()); + SYSTEM STOP MERGES landing; -- Stopping merges to force 3 parts INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; @@ -38,5 +49,21 @@ SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', SELECT count() FROM ds; +-- When reading from distributed table, 6 rows are going to be retrieved +-- due to the being using the two shards cluster + +INSERT INTO ds SELECT * FROM landing_dist +SETTINGS insert_deduplicate=1, insert_deduplication_token='token3', + max_insert_threads=5; + +SELECT count() FROM ds; + +INSERT INTO ds SELECT * FROM landing_dist +SETTINGS insert_deduplicate=1, insert_deduplication_token='token4', + max_insert_threads=1; + +SELECT count() FROM ds; + DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS landing_dist SYNC; DROP TABLE IF EXISTS ds SYNC; From 0bedd98bb7eb4e4c560ee92de1d1d958ec95897e Mon Sep 17 00:00:00 2001 From: Joris Clement <7713214+joclement@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:49:12 +0100 Subject: [PATCH 243/356] style: trailing whitespace --- docs/en/operations/settings/settings-formats.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f7d9586dd5b..0cac71df983 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -467,7 +467,7 @@ Enabled by default. Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference -by using String type for keys with unknown types. +by using String type for keys with unknown types. Example: @@ -1605,7 +1605,7 @@ possible values: - `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats. - `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats. -Default value is `auto`. +Default value is `auto`. ### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} From b791ced86f3c7a739e63a26a8ecbf52ed376db1d Mon Sep 17 00:00:00 2001 From: Joris Clement <7713214+joclement@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:49:25 +0100 Subject: [PATCH 244/356] docs: typo With https://github.com/ClickHouse/ClickHouse/pull/39423 allowing the the usage of single quotes was disabled by default. Adapt the documentation accordingly. --- docs/en/operations/settings/settings-formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 0cac71df983..477fbf94625 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -891,7 +891,7 @@ Default value: `,`. If it is set to true, allow strings in single quotes. -Enabled by default. +Disabled by default. ### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} From 97f955fcde7b1e8c0fe935d5233cd791780b78a0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 5 Mar 2024 14:10:46 +0000 Subject: [PATCH 245/356] Fix sanitizers --- .../System/StorageSystemDetachedParts.cpp | 29 +++++++++++++++---- .../System/StorageSystemPartsBase.cpp | 24 +++++++++++++++ src/Storages/System/StorageSystemPartsBase.h | 26 ----------------- 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 913983952c3..fa74093a5a5 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -287,7 +287,7 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i setInMemoryMetadata(storage_metadata); } -class ReadFromSystemDetachedParts : public ReadFromSystemPartsBase +class ReadFromSystemDetachedParts : public SourceStepWithFilter { public: ReadFromSystemDetachedParts( @@ -296,23 +296,42 @@ public: const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, - std::shared_ptr storage_, + std::shared_ptr storage_, std::vector columns_mask_, size_t max_block_size_, size_t num_streams_) - : ReadFromSystemPartsBase(column_names_, query_info_, storage_snapshot_, context_, sample_block, std::move(storage_), std::move(columns_mask_), false) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) , max_block_size(max_block_size_) , num_streams(num_streams_) {} std::string getName() const override { return "ReadFromSystemDetachedParts"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; -private: +protected: + std::shared_ptr storage; + std::vector columns_mask; + + const ActionsDAG::Node * predicate = nullptr; const size_t max_block_size; const size_t num_streams; }; +void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + void StorageSystemDetachedParts::read( QueryPlan & query_plan, const Names & column_names, @@ -328,7 +347,7 @@ void StorageSystemDetachedParts::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - auto this_ptr = std::static_pointer_cast(shared_from_this()); + auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( column_names, query_info, storage_snapshot, diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 5921fbc8f0f..f31dd5a955e 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -203,6 +203,30 @@ StoragesInfoStream::StoragesInfoStream(const ActionsDAG::Node * predicate, Conte storage_uuid_column = block_to_filter.getByName("uuid").column; } +class ReadFromSystemPartsBase : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemPartsBase"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemPartsBase( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + bool has_state_column_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +protected: + std::shared_ptr storage; + std::vector columns_mask; + const bool has_state_column; + const ActionsDAG::Node * predicate = nullptr; +}; ReadFromSystemPartsBase::ReadFromSystemPartsBase( const Names & column_names_, diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 2b4055373d1..b5d4a74e58b 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -156,30 +156,4 @@ protected: processNextStorage(ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) = 0; }; -class ReadFromSystemPartsBase : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemPartsBase"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemPartsBase( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - std::vector columns_mask_, - bool has_state_column_); - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -protected: - std::shared_ptr storage; - std::vector columns_mask; - const bool has_state_column; - const ActionsDAG::Node * predicate = nullptr; -}; - - } From 56e0dd0ab0c6adc3e1e2e7a15ba74a96594c0762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 5 Mar 2024 15:30:42 +0100 Subject: [PATCH 246/356] More fixes --- src/Parsers/ASTQueryWithOutput.cpp | 23 +++++++++++++++++------ src/Parsers/ParserQueryWithOutput.cpp | 2 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index 3890ed2347a..e47f4dcf29d 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -74,12 +74,23 @@ bool ASTQueryWithOutput::resetOutputASTIfExist(IAST & ast) /// FIXME: try to prettify this cast using `as<>()` if (auto * ast_with_output = dynamic_cast(&ast)) { - ast_with_output->out_file.reset(); - ast_with_output->format.reset(); - ast_with_output->settings_ast.reset(); - ast_with_output->compression.reset(); - ast_with_output->compression_level.reset(); - ast_with_output->children.clear(); + auto remove_if_exists = [&](ASTPtr & p) + { + if (p) + { + if (auto it = std::find(ast_with_output->children.begin(), ast_with_output->children.end(), p); + it != ast_with_output->children.end()) + ast_with_output->children.erase(it); + p.reset(); + } + }; + + remove_if_exists(ast_with_output->out_file); + remove_if_exists(ast_with_output->format); + remove_if_exists(ast_with_output->settings_ast); + remove_if_exists(ast_with_output->compression); + remove_if_exists(ast_with_output->compression_level); + return true; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 7a627ae5f6a..4a0b928608b 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -136,6 +136,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserStringLiteral compression; if (!compression.parse(pos, query_with_output.compression, expected)) return false; + query_with_output.children.push_back(query_with_output.compression); ParserKeyword s_compression_level("LEVEL"); if (s_compression_level.ignore(pos, expected)) @@ -143,6 +144,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserNumber compression_level; if (!compression_level.parse(pos, query_with_output.compression_level, expected)) return false; + query_with_output.children.push_back(query_with_output.compression_level); } } From 67be42fa678df061ebe83510d42c859853cd4d62 Mon Sep 17 00:00:00 2001 From: Pradeep Chhetri Date: Wed, 24 May 2023 15:02:22 +0800 Subject: [PATCH 247/356] Enable pylint for all tests --- utils/check-style/check-style | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 3a966daea41..620aec2fda2 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -152,8 +152,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | grep -vP $EXCLUDE_DIRS | xargs xmllint --noout --nonet -# FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py +pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/**/*.py find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | From 17f1e442413eed9c4f0e7484bcaa1295b660e2b0 Mon Sep 17 00:00:00 2001 From: Pradeep Chhetri Date: Mon, 29 May 2023 22:34:31 +0800 Subject: [PATCH 248/356] Fix linter issues in ci-runner.py --- tests/integration/ci-runner.py | 49 +++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 08dd9ba276b..acafa16b837 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -11,6 +11,7 @@ import shlex import shutil import string import subprocess +import sys import time import zlib # for crc32 from collections import defaultdict @@ -196,7 +197,7 @@ def clear_ip_tables_and_restart_daemons(): shell=True, ) except subprocess.CalledProcessError as err: - logging.info("docker kill excepted: " + str(err)) + logging.info("docker kill excepted: %s", str(err)) try: logging.info("Removing all docker containers") @@ -205,7 +206,7 @@ def clear_ip_tables_and_restart_daemons(): shell=True, ) except subprocess.CalledProcessError as err: - logging.info("docker rm excepted: " + str(err)) + logging.info("docker rm excepted: %s", str(err)) # don't restart docker if it's disabled if os.environ.get("CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER", "1") == "1": @@ -213,7 +214,7 @@ def clear_ip_tables_and_restart_daemons(): logging.info("Stopping docker daemon") subprocess.check_output("service docker stop", shell=True) except subprocess.CalledProcessError as err: - logging.info("docker stop excepted: " + str(err)) + logging.info("docker stop excepted: %s", str(err)) try: for i in range(200): @@ -228,7 +229,7 @@ def clear_ip_tables_and_restart_daemons(): else: raise Exception("Docker daemon doesn't responding") except subprocess.CalledProcessError as err: - logging.info("Can't reload docker: " + str(err)) + logging.info("Can't reload docker: %s", str(err)) iptables_iter = 0 try: @@ -276,13 +277,14 @@ class ClickhouseIntegrationTestsRunner: def base_path(self): return os.path.join(str(self.result_path), "../") - def should_skip_tests(self): + @staticmethod + def should_skip_tests(): return [] def get_image_with_version(self, name): if name in self.image_versions: return name + ":" + self.image_versions[name] - logging.warn( + logging.warning( "Cannot find image %s in params list %s", name, self.image_versions ) if ":" not in name: @@ -292,7 +294,7 @@ class ClickhouseIntegrationTestsRunner: def get_image_version(self, name: str): if name in self.image_versions: return self.image_versions[name] - logging.warn( + logging.warning( "Cannot find image %s in params list %s", name, self.image_versions ) return "latest" @@ -322,13 +324,14 @@ class ClickhouseIntegrationTestsRunner: ) return except subprocess.CalledProcessError as err: - logging.info("docker-compose pull failed: " + str(err)) + logging.info("docker-compose pull failed: %s", str(err)) continue logging.error("Pulling images failed for 5 attempts. Will fail the worker.") # We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job - exit(13) + sys.exit(13) - def _can_run_with(self, path, opt): + @staticmethod + def _can_run_with(path, opt): with open(path, "r") as script: for line in script: if opt in line: @@ -358,7 +361,9 @@ class ClickhouseIntegrationTestsRunner: if retcode == 0: logging.info("Installation of %s successfull", full_path) else: - raise Exception("Installation of %s failed", full_path) + raise Exception( + "Installation of {} failed".format(full_path) + ) break else: raise Exception("Package with {} not found".format(package)) @@ -387,10 +392,11 @@ class ClickhouseIntegrationTestsRunner: os.getenv("CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH"), ) - def _compress_logs(self, dir, relpaths, result_path): + @staticmethod + def _compress_logs(directory, relpaths, result_path): retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL "tar --use-compress-program='zstd --threads=0' -cf {} -C {} {}".format( - result_path, dir, " ".join(relpaths) + result_path, directory, " ".join(relpaths) ), shell=True, ) @@ -443,7 +449,8 @@ class ClickhouseIntegrationTestsRunner: return list(sorted(all_tests)) - def _get_parallel_tests_skip_list(self, repo_path): + @staticmethod + def _get_parallel_tests_skip_list(repo_path): skip_list_file_path = "{}/tests/integration/parallel_skip.json".format( repo_path ) @@ -462,7 +469,8 @@ class ClickhouseIntegrationTestsRunner: skip_list_tests = json.load(skip_list_file) return list(sorted(skip_list_tests)) - def group_test_by_file(self, tests): + @staticmethod + def group_test_by_file(tests): result = {} for test in tests: test_file = test.split("::")[0] @@ -471,7 +479,8 @@ class ClickhouseIntegrationTestsRunner: result[test_file].append(test) return result - def _update_counters(self, main_counters, current_counters, broken_tests): + @staticmethod + def _update_counters(main_counters, current_counters, broken_tests): for test in current_counters["PASSED"]: if test not in main_counters["PASSED"]: if test in main_counters["FAILED"]: @@ -525,7 +534,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Cannot run with custom docker image version :(") return image_cmd - def _find_test_data_dirs(self, repo_path, test_names): + @staticmethod + def _find_test_data_dirs(repo_path, test_names): relpaths = {} for test_name in test_names: if "/" in test_name: @@ -543,7 +553,8 @@ class ClickhouseIntegrationTestsRunner: relpaths[relpath] = mtime return relpaths - def _get_test_data_dirs_difference(self, new_snapshot, old_snapshot): + @staticmethod + def _get_test_data_dirs_difference(new_snapshot, old_snapshot): res = set() for path in new_snapshot: if (path not in old_snapshot) or (old_snapshot[path] != new_snapshot[path]): @@ -569,7 +580,7 @@ class ClickhouseIntegrationTestsRunner: broken_tests, ) except Exception as e: - logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) + logging.info("Failed to run %s:\n%s", str(test_group), str(e)) counters = { "ERROR": [], "PASSED": [], From 774fcdfff65ee404ff9af374365f847e73f9bde9 Mon Sep 17 00:00:00 2001 From: Pradeep Chhetri Date: Tue, 30 May 2023 16:16:38 +0800 Subject: [PATCH 249/356] wip --- tests/integration/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 6e76270c607..3e03a49aaec 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,7 +1,9 @@ -from helpers.cluster import run_and_check +#!/usr/bin/env python3 + import pytest import logging import os +from helpers.cluster import run_and_check from helpers.test_tools import TSV from helpers.network import _NetworkManager From 76cbd783319cc7d924c4afbf4839f8b05585f087 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Feb 2024 13:54:08 +0100 Subject: [PATCH 250/356] Update pylint, mypy, and black in style check image --- docker/test/style/Dockerfile | 2 +- pyproject.toml | 39 ++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index abc2dba0e9d..6153aec2144 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -18,7 +18,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ yamllint \ locales \ - && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ && rm -rf /root/.cache/pip diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000000..277b24d4fd7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[tool.pylint.BASIC] +max-module-lines=2000 +# due to SQL +max-line-length=200 +# Drop/decrease them one day: +max-branches=50 +max-nested-blocks=10 +max-statements=200 + +[tool.pylint.FORMAT] +#ignore-long-lines = (# )??$ + +[tool.pylint.'MESSAGES CONTROL'] + # pytest.mark.parametrize is not callable (not-callable) +disable = '''missing-docstring, + too-few-public-methods, + invalid-name, + too-many-arguments, + keyword-arg-before-vararg, + too-many-locals, + too-many-instance-attributes, + cell-var-from-loop, + fixme, + too-many-public-methods, + wildcard-import, + unused-wildcard-import, + singleton-comparison, + not-callable, + redefined-outer-name, + broad-except, + bare-except, + no-else-return, + global-statement + ''' + +[tool.pylint.SIMILARITIES] +# due to SQL +min-similarity-lines=1000 + From e31078e204d135a6594c6bb356ec4b447a4353c4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Feb 2024 18:46:15 +0100 Subject: [PATCH 251/356] Fix some issues detected by new pylint --- tests/ci/ast_fuzzer_check.py | 4 +- tests/ci/bugfix_validate_check.py | 38 +++++----- tests/ci/build_download_helper.py | 9 +-- tests/ci/cherry_pick.py | 12 ++-- tests/ci/ci.py | 17 +++-- tests/ci/ci_config.py | 23 +++--- tests/ci/clickhouse_helper.py | 49 ++++++------- tests/ci/functional_test_check.py | 8 +-- tests/ci/libfuzzer_test_check.py | 15 ++-- tests/ci/stress_check.py | 2 +- tests/ci/style_check.py | 2 +- tests/ci/tee_popen.py | 13 ++-- tests/clickhouse-test | 112 ++++++++++++++++-------------- 13 files changed, 153 insertions(+), 151 deletions(-) diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 6e3da7fa816..1ecf805cadc 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -67,14 +67,14 @@ def main(): build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find the clickhouse binary among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index ae7fce1f102..7aaf18e7765 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -1,37 +1,37 @@ #!/usr/bin/env python3 -from pathlib import Path -import subprocess -import sys -from typing import List, Sequence, Tuple import csv import logging +import subprocess +import sys +from pathlib import Path +from typing import List, Sequence, Tuple -from report import ( - ERROR, - FAILURE, - SKIPPED, - SUCCESS, - FAIL, - OK, - TestResult, - TestResults, - JobReport, -) -from env_helper import TEMP_PATH -from stopwatch import Stopwatch from ci_config import JobNames from ci_utils import normalize_string +from env_helper import TEMP_PATH from functional_test_check import NO_CHANGES_MSG +from report import ( + ERROR, + FAIL, + FAILURE, + OK, + SKIPPED, + SUCCESS, + JobReport, + TestResult, + TestResults, +) +from stopwatch import Stopwatch def post_commit_status_from_file(file_path: Path) -> List[str]: with open(file_path, "r", encoding="utf-8") as f: res = list(csv.reader(f, delimiter="\t")) if len(res) < 1: - raise Exception(f'Can\'t read from "{file_path}"') + raise IndexError(f'Can\'t read from "{file_path}"') if len(res[0]) != 3: - raise Exception(f'Can\'t read from "{file_path}"') + raise IndexError(f'Can\'t read from "{file_path}"') return res[0] diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0d24cb80021..7ad39a5fff1 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -8,9 +8,8 @@ import time from pathlib import Path from typing import Any, Callable, List, Union -import requests # type: ignore - import get_robot_token as grt # we need an updated ROBOT_TOKEN +import requests # type: ignore from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 @@ -30,9 +29,10 @@ def get_with_retries( "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url ) exc = Exception("A placeholder to satisfy typing and avoid nesting") + timeout = kwargs.pop("timeout", 30) for i in range(retries): try: - response = requests.get(url, **kwargs) + response = requests.get(url, timeout=timeout, **kwargs) response.raise_for_status() return response except Exception as e: @@ -74,10 +74,11 @@ def get_gh_api( token_is_set = "Authorization" in kwargs.get("headers", {}) exc = Exception("A placeholder to satisfy typing and avoid nesting") try_cnt = 0 + timeout = kwargs.pop("timeout", 30) while try_cnt < retries: try_cnt += 1 try: - response = requests.get(url, **kwargs) + response = requests.get(url, timeout=timeout, **kwargs) response.raise_for_status() return response except requests.HTTPError as e: diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 2c40b2a4099..d92504e30bd 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -456,11 +456,13 @@ class Backport: tomorrow = date.today() + timedelta(days=1) logging.info("Receive PRs suppose to be backported") - query_args = dict( - query=f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", - label=",".join(self.labels_to_backport + [self.must_create_backport_label]), - merged=[since_date, tomorrow], - ) + query_args = { + "query": f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", + "label": ",".join( + self.labels_to_backport + [self.must_create_backport_label] + ), + "merged": [since_date, tomorrow], + } logging.info("Query to find the backport PRs:\n %s", query_args) self.prs_for_backport = self.gh.get_pulls_from_search(**query_args) logging.info( diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 234eec48463..d6e07d4ff6e 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -397,7 +397,7 @@ class CiCache: status.dump_to_file(record_file) elif record_type == self.RecordType.PENDING: assert isinstance(status, PendingState) - with open(record_file, "w") as json_file: + with open(record_file, "w", encoding="utf-8") as json_file: json.dump(asdict(status), json_file) else: assert False @@ -1005,7 +1005,7 @@ def _mark_success_action( def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None: if outfile: - with open(outfile, "w") as f: + with open(outfile, "w", encoding="utf-8") as f: if isinstance(result, str): print(result, file=f) elif isinstance(result, dict): @@ -1125,8 +1125,7 @@ def _configure_jobs( jobs_to_wait: Dict[str, Dict[str, Any]] = {} randomization_buckets = {} # type: Dict[str, Set[str]] - for job in digests: - digest = digests[job] + for job, digest in digests.items(): job_config = CI_CONFIG.get_job_config(job) num_batches: int = job_config.num_batches batches_to_do: List[int] = [] @@ -1612,11 +1611,11 @@ def main() -> int: indata: Optional[Dict[str, Any]] = None if args.infile: - indata = ( - json.loads(args.infile) - if not os.path.isfile(args.infile) - else json.load(open(args.infile)) - ) + if os.path.isfile(args.infile): + with open(args.infile, encoding="utf-8") as jfd: + indata = json.load(jfd) + else: + indata = json.loads(args.infile) assert indata and isinstance(indata, dict), "Invalid --infile json" result: Dict[str, Any] = {} diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b9ee5670066..f7e056ad066 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -570,8 +570,7 @@ class CIConfig: self.builds_report_config, self.test_configs, ): - for check_name in config: # type: ignore - yield check_name + yield from config # type: ignore def get_builds_for_report( self, report_name: str, release: bool = False, backport: bool = False @@ -698,17 +697,15 @@ CI_CONFIG = CIConfig( job for job in JobNames if not any( - [ - nogo in job - for nogo in ( - "asan", - "tsan", - "msan", - "ubsan", - # skip build report jobs as not all builds will be done - "build check", - ) - ] + nogo in job + for nogo in ( + "asan", + "tsan", + "msan", + "ubsan", + # skip build report jobs as not all builds will be done + "build check", + ) ) ] ), diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index f338a1e14c3..3ab41673dec 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 -from pathlib import Path -from typing import Dict, List, Optional import fileinput import json import logging import time +from pathlib import Path +from typing import Dict, List, Optional import requests # type: ignore - from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import TestResults @@ -72,11 +71,11 @@ class ClickHouseHelper: if args: url = args[0] url = kwargs.get("url", url) - kwargs["timeout"] = kwargs.get("timeout", 100) + timeout = kwargs.pop("timeout", 100) for i in range(5): try: - response = requests.post(*args, **kwargs) + response = requests.post(*args, timeout=timeout, **kwargs) except Exception as e: error = f"Received exception while sending data to {url} on {i} attempt: {e}" logging.warning(error) @@ -148,7 +147,9 @@ class ClickHouseHelper: for i in range(5): response = None try: - response = requests.get(self.url, params=params, headers=self.auth) + response = requests.get( + self.url, params=params, headers=self.auth, timeout=100 + ) response.raise_for_status() return response.text except Exception as ex: @@ -215,24 +216,24 @@ def prepare_tests_results_for_clickhouse( head_ref = pr_info.head_ref head_repo = pr_info.head_name - common_properties = dict( - pull_request_number=pr_info.number, - commit_sha=pr_info.sha, - commit_url=pr_info.commit_html_url, - check_name=check_name, - check_status=check_status, - check_duration_ms=int(float(check_duration) * 1000), - check_start_time=check_start_time, - report_url=report_url, - pull_request_url=pull_request_url, - base_ref=base_ref, - base_repo=base_repo, - head_ref=head_ref, - head_repo=head_repo, - task_url=pr_info.task_url, - instance_type=get_instance_type(), - instance_id=get_instance_id(), - ) + common_properties = { + "pull_request_number": pr_info.number, + "commit_sha": pr_info.sha, + "commit_url": pr_info.commit_html_url, + "check_name": check_name, + "check_status": check_status, + "check_duration_ms": int(float(check_duration) * 1000), + "check_start_time": check_start_time, + "report_url": report_url, + "pull_request_url": pull_request_url, + "base_ref": base_ref, + "base_repo": base_repo, + "head_ref": head_ref, + "head_repo": head_repo, + "task_url": pr_info.task_url, + "instance_type": get_instance_type(), + "instance_id": get_instance_id(), + } # Always publish a total record for all checks. For checks with individual # tests, also publish a record per test. diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 1e5807a96c0..4416b7863a6 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -12,10 +12,9 @@ from typing import List, Tuple from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials - -from docker_images_helper import DockerImage, pull_image, get_docker_image +from docker_images_helper import DockerImage, get_docker_image, pull_image from download_release_packages import download_last_release -from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY +from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from pr_info import PRInfo from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results from stopwatch import Stopwatch @@ -54,8 +53,7 @@ def get_image_name(check_name: str) -> str: return "clickhouse/stateless-test" if "stateful" in check_name.lower(): return "clickhouse/stateful-test" - else: - raise Exception(f"Cannot deduce image name based on check name {check_name}") + raise ValueError(f"Cannot deduce image name based on check name {check_name}") def get_run_command( diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 465b9a6b3a7..62dadc6fea9 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -9,19 +9,13 @@ from pathlib import Path from typing import List from build_download_helper import download_fuzzers -from clickhouse_helper import ( - CiLogsCredentials, -) -from docker_images_helper import DockerImage, pull_image, get_docker_image - -from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY +from clickhouse_helper import CiLogsCredentials +from docker_images_helper import DockerImage, get_docker_image, pull_image +from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from pr_info import PRInfo - from stopwatch import Stopwatch - from tee_popen import TeePopen - NO_CHANGES_MSG = "Nothing to run" @@ -130,7 +124,8 @@ def main(): os.chmod(fuzzers_path / file, 0o777) elif file.endswith("_seed_corpus.zip"): corpus_path = fuzzers_path / (file.removesuffix("_seed_corpus.zip") + ".in") - zipfile.ZipFile(fuzzers_path / file, "r").extractall(corpus_path) + with zipfile.ZipFile(fuzzers_path / file, "r") as zfd: + zfd.extractall(corpus_path) result_path = temp_path / "result_path" result_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 49c1515c69f..0f2c0cdd222 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -95,7 +95,7 @@ def process_results( results_path = result_directory / "test_results.tsv" test_results = read_test_results(results_path, True) if len(test_results) == 0: - raise Exception("Empty results") + raise ValueError("Empty results") except Exception as e: return ( ERROR, diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0c7160aeea4..d004f3ed215 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -43,7 +43,7 @@ def process_result( results_path = result_directory / "test_results.tsv" test_results = read_test_results(results_path) if len(test_results) == 0: - raise Exception("Empty results") + raise ValueError("Empty results") return state, description, test_results, additional_files except Exception: diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index a50532aea20..13db50df53f 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 -from io import TextIOWrapper -from pathlib import Path -from subprocess import Popen, PIPE, STDOUT -from threading import Thread -from time import sleep -from typing import Optional, Union import logging import os import sys +from io import TextIOWrapper +from pathlib import Path +from subprocess import PIPE, STDOUT, Popen +from threading import Thread +from time import sleep +from typing import Optional, Union # Very simple tee logic implementation. You can specify a shell command, output @@ -98,5 +98,6 @@ class TeePopen: @property def log_file(self) -> TextIOWrapper: if self._log_file is None: + # pylint:disable-next=consider-using-with self._log_file = open(self._log_file_name, "w", encoding="utf-8") return self._log_file diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5afda75aa5c..ce0feadf050 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -5,48 +5,46 @@ # pylint: disable=too-many-lines # pylint: disable=anomalous-backslash-in-string -import enum -from queue import Full -import shutil -import itertools -import sys -import os -import os.path -import glob -import platform -import signal -import re import copy -import traceback -import math +import enum +import glob # Not requests, to avoid requiring extra dependency. import http.client -import urllib.parse +import itertools import json +import math +import multiprocessing +import os +import os.path +import platform +import random +import re +import shutil +import signal +import socket +import string +import subprocess +import sys +import traceback +import urllib.parse # for crc32 import zlib - from argparse import ArgumentParser -from typing import Tuple, Union, Optional, Dict, Set, List -import subprocess -from subprocess import Popen -from subprocess import PIPE +from contextlib import closing from datetime import datetime, timedelta -from time import time, sleep from errno import ESRCH +from queue import Full +from subprocess import PIPE, Popen +from time import sleep, time +from typing import Dict, List, Optional, Set, Tuple, Union try: import termcolor # type: ignore except ImportError: termcolor = None -import random -import string -import multiprocessing -import socket -from contextlib import closing USE_JINJA = True try: @@ -70,7 +68,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -def stringhash(s): +def stringhash(s: str) -> int: # default hash() function consistent # only during process invocation https://stackoverflow.com/a/42089311 return zlib.crc32(s.encode("utf-8")) @@ -94,6 +92,10 @@ def trim_for_log(s): return "\n".join(lines) +class TestException(Exception): + pass + + class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -250,7 +252,7 @@ def get_db_engine(args, database_name): def get_create_database_settings(args, testcase_args): - create_database_settings = dict() + create_database_settings = {} if testcase_args: create_database_settings["log_comment"] = testcase_args.testcase_basename if args.db_engine == "Ordinary": @@ -1186,7 +1188,7 @@ class TestCase: ) if result_is_different: - diff_proc = Popen( + with Popen( [ "diff", "-U", @@ -1197,23 +1199,23 @@ class TestCase: encoding="latin-1", stdout=PIPE, universal_newlines=True, - ) - if self.show_whitespaces_in_diff: - sed_proc = Popen( - ["sed", "-e", "s/[ \t]\\+$/&$/g"], - stdin=diff_proc.stdout, - stdout=PIPE, - ) - diff_proc.stdout.close() # Allow diff to receive a SIGPIPE if cat exits. - diff = sed_proc.communicate()[0].decode("utf-8", errors="ignore") - else: - diff = diff_proc.communicate()[0] + ) as diff_proc: + if self.show_whitespaces_in_diff: + with Popen( + ["sed", "-e", "s/[ \t]\\+$/&$/g"], + stdin=diff_proc.stdout, + stdout=PIPE, + ) as sed_proc: + diff = sed_proc.communicate()[0].decode( + "utf-8", errors="ignore" + ) + else: + diff = diff_proc.communicate()[0] if diff.startswith("Binary files "): diff += "Content of stdout:\n===================\n" - file = open(self.stdout_file, "rb") - diff += str(file.read()) - file.close() + with open(self.stdout_file, "rb") as file: + diff += str(file.read()) diff += "===================" description += f"\n{diff}\n" if debug_log: @@ -1376,6 +1378,7 @@ class TestCase: command = pattern.format(**params) + # pylint:disable-next=consider-using-with; TODO: fix proc = Popen(command, shell=True, env=os.environ, start_new_session=True) while ( @@ -1542,7 +1545,7 @@ class TestCase: ) if len(leftover_tables) != 0: - raise Exception( + raise TestException( f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." ) @@ -1625,7 +1628,7 @@ class TestSuite: ): return "#" else: - raise Exception(f"Unknown file_extension: {filename}") + raise TestException(f"Unknown file_extension: {filename}") def parse_tags_from_line(line, comment_sign) -> Set[str]: if not line.startswith(comment_sign): @@ -1686,17 +1689,22 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - filter_func = lambda x: True # noqa: ignore E731 - if args.run_by_hash_num is not None and args.run_by_hash_total is not None: if args.run_by_hash_num > args.run_by_hash_total: - raise Exception( + raise TestException( f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}" ) - filter_func = ( - lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num - ) + def filter_func(x: str) -> bool: + return bool( + stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + ) + + else: + + def filter_func(x: str) -> bool: + _ = x + return True self.all_tests: List[str] = self.get_tests_list( self.tests_in_suite_key_func, filter_func @@ -2418,7 +2426,7 @@ def main(args): pid = get_server_pid() print("Got server pid", pid) print_stacktraces() - raise Exception(msg) + raise TestException(msg) args.build_flags = collect_build_flags(args) args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args) @@ -2622,7 +2630,7 @@ def find_binary(name): if os.access(bin_path, os.X_OK): return bin_path - raise Exception(f"{name} was not found in PATH") + raise TestException(f"{name} was not found in PATH") def find_clickhouse_command(binary, command): From 91cff01ce0bc483334d58d8b40b9fe83bdda319a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Feb 2024 19:25:02 +0100 Subject: [PATCH 252/356] More fixes in tests/ci --- tests/ci/build_download_helper.py | 6 +++++- tests/ci/commit_status_helper.py | 6 +++--- tests/ci/compatibility_check.py | 6 +++--- tests/ci/docker_images_check.py | 27 ++++++++++++++----------- tests/ci/docker_manifests_merge.py | 26 +++++++++++++----------- tests/ci/docker_server.py | 4 ++-- tests/ci/get_previous_release_tag.py | 9 +++++---- tests/ci/jepsen_check.py | 12 +++++------ tests/ci/release.py | 18 ++++++++--------- tests/ci/report.py | 8 ++++---- tests/ci/sqlancer_check.py | 4 ++-- tests/ci/sqltest.py | 4 ++-- tests/ci/ssh.py | 16 +++++++-------- tests/ci/stress.py | 30 +++++++++++++++++----------- tests/ci/unit_tests_check.py | 2 +- 15 files changed, 97 insertions(+), 81 deletions(-) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 7ad39a5fff1..c7a461e003b 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -8,8 +8,12 @@ import time from pathlib import Path from typing import Any, Callable, List, Union -import get_robot_token as grt # we need an updated ROBOT_TOKEN +# isort: off import requests # type: ignore + +# isort: on + +import get_robot_token as grt # we need an updated ROBOT_TOKEN from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 9d2742ba5ba..1c2d8b2ade8 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -303,7 +303,7 @@ def post_commit_status_to_file( file_path: Path, description: str, state: str, report_url: str ) -> None: if file_path.exists(): - raise Exception(f'File "{file_path}" already exists!') + raise FileExistsError(f'File "{file_path}" already exists!') with open(file_path, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow([state, report_url, description]) @@ -329,7 +329,7 @@ class CommitStatusData: @classmethod def load_from_file(cls, file_path: Union[Path, str]): # type: ignore res = {} - with open(file_path, "r") as json_file: + with open(file_path, "r", encoding="utf-8") as json_file: res = json.load(json_file) return CommitStatusData(**cls._filter_dict(res)) @@ -347,7 +347,7 @@ class CommitStatusData: def dump_to_file(self, file_path: Union[Path, str]) -> None: file_path = Path(file_path) or STATUS_FILE_PATH - with open(file_path, "w") as json_file: + with open(file_path, "w", encoding="utf-8") as json_file: json.dump(asdict(self), json_file) def is_ok(self): diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 8009ef24760..5e980660749 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -26,7 +26,7 @@ DOWNLOAD_RETRIES_COUNT = 5 def process_os_check(log_path: Path) -> TestResult: name = log_path.name - with open(log_path, "r") as log: + with open(log_path, "r", encoding="utf-8") as log: line = log.read().split("\n")[0].strip() if line != "OK": return TestResult(name, "FAIL") @@ -35,7 +35,7 @@ def process_os_check(log_path: Path) -> TestResult: def process_glibc_check(log_path: Path, max_glibc_version: str) -> TestResults: test_results = [] # type: TestResults - with open(log_path, "r") as log: + with open(log_path, "r", encoding="utf-8") as log: for line in log: if line.strip(): columns = line.strip().split(" ") @@ -204,7 +204,7 @@ def main(): elif "aarch64" in check_name: max_glibc_version = "2.18" # because of build with newer sysroot? else: - raise Exception("Can't determine max glibc version") + raise RuntimeError("Can't determine max glibc version") state, description, test_results, additional_logs = process_result( result_path, diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index ad497a00eba..b04a3975545 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -195,18 +195,21 @@ def main(): ok_cnt = 0 status = SUCCESS # type: StatusType - image_tags = ( - json.loads(args.image_tags) - if not os.path.isfile(args.image_tags) - else json.load(open(args.image_tags)) - ) - missing_images = ( - image_tags - if args.missing_images == "all" - else json.loads(args.missing_images) - if not os.path.isfile(args.missing_images) - else json.load(open(args.missing_images)) - ) + + if os.path.isfile(args.image_tags): + with open(args.image_tags, "r", encoding="utf-8") as jfd: + image_tags = json.load(jfd) + else: + image_tags = json.loads(args.image_tags) + + if args.missing_images == "all": + missing_images = image_tags + elif os.path.isfile(args.missing_images): + with open(args.missing_images, "r", encoding="utf-8") as jfd: + missing_images = json.load(jfd) + else: + missing_images = json.loads(args.missing_images) + images_build_list = get_images_oredered_list() for image in images_build_list: diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index fc00969d5d6..3c122545735 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -135,18 +135,20 @@ def main(): archs = args.suffixes assert len(archs) > 1, "arch suffix input param is invalid" - image_tags = ( - json.loads(args.image_tags) - if not os.path.isfile(args.image_tags) - else json.load(open(args.image_tags)) - ) - missing_images = ( - list(image_tags) - if args.missing_images == "all" - else json.loads(args.missing_images) - if not os.path.isfile(args.missing_images) - else json.load(open(args.missing_images)) - ) + if os.path.isfile(args.image_tags): + with open(args.image_tags, "r", encoding="utf-8") as jfd: + image_tags = json.load(jfd) + else: + image_tags = json.loads(args.image_tags) + + if args.missing_images == "all": + missing_images = image_tags + elif os.path.isfile(args.missing_images): + with open(args.missing_images, "r", encoding="utf-8") as jfd: + missing_images = json.load(jfd) + else: + missing_images = json.loads(args.missing_images) + test_results = [] status = SUCCESS # type: StatusType diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 38d0ea6d86b..fda0dd16d0b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -363,8 +363,8 @@ def main(): image = DockerImageData(image_path, image_repo, False) args.release_type = auto_release_type(args.version, args.release_type) tags = gen_tags(args.version, args.release_type) - repo_urls = dict() - direct_urls: Dict[str, List[str]] = dict() + repo_urls = {} + direct_urls: Dict[str, List[str]] = {} release_or_pr, _ = get_release_or_pr(pr_info, args.version) for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index c2d279f7fec..b74432425d3 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -import re import logging +import re from typing import List, Optional, Tuple import requests # type: ignore @@ -82,13 +82,14 @@ def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseI CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10 ) if not response.ok: - raise Exception( - "Cannot load the list of tags from github: " + response.reason + logger.error( + "Cannot load the list of tags from github: %s", response.reason ) + response.raise_for_status() releases_str = set(re.findall(VERSION_PATTERN, response.text)) if len(releases_str) == 0: - raise Exception( + raise ValueError( "Cannot find previous release for " + str(server_version) + " server version" diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index fb7540abda3..01263c3085d 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -46,7 +46,7 @@ FAILED_TESTS_ANCHOR = "# Failed tests" def _parse_jepsen_output(path: Path) -> TestResults: test_results = [] # type: TestResults current_type = "" - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: for line in f: if SUCCESSFUL_TESTS_ANCHOR in line: current_type = "OK" @@ -101,7 +101,7 @@ def prepare_autoscaling_group_and_get_hostnames(count): instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) counter += 1 if counter > 30: - raise Exception("Cannot wait autoscaling group") + raise RuntimeError("Cannot wait autoscaling group") ec2_client = boto3.client("ec2", region_name="us-east-1") return get_instances_addresses(ec2_client, instances) @@ -119,12 +119,12 @@ def clear_autoscaling_group(): instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) counter += 1 if counter > 30: - raise Exception("Cannot wait autoscaling group") + raise RuntimeError("Cannot wait autoscaling group") def save_nodes_to_file(instances: List[Any], temp_path: Path) -> Path: nodes_path = temp_path / "nodes.txt" - with open(nodes_path, "w") as f: + with open(nodes_path, "w", encoding="utf-8") as f: f.write("\n".join(instances)) f.flush() return nodes_path @@ -159,7 +159,7 @@ def main(): ) args = parser.parse_args() - if args.program != "server" and args.program != "keeper": + if args.program not in ("server", "keeper"): logging.warning("Invalid argument '%s'", args.program) sys.exit(0) @@ -220,7 +220,7 @@ def main(): f"{S3_URL}/{S3_BUILDS_BUCKET}/{version}/{sha}/binary_release/clickhouse" ) print(f"Clickhouse version: [{version_full}], sha: [{sha}], url: [{build_url}]") - head = requests.head(build_url) + head = requests.head(build_url, timeout=60) assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" else: build_name = get_build_name_for_check(check_name) diff --git a/tests/ci/release.py b/tests/ci/release.py index 2b3331938e7..679e65560f5 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -54,7 +54,7 @@ class Repo: elif protocol == "origin": self._url = protocol else: - raise Exception(f"protocol must be in {self.VALID}") + raise ValueError(f"protocol must be in {self.VALID}") def __str__(self): return self._repo @@ -144,7 +144,7 @@ class Release: for status in statuses: if status["context"] == RELEASE_READY_STATUS: if not status["state"] == SUCCESS: - raise Exception( + raise ValueError( f"the status {RELEASE_READY_STATUS} is {status['state']}" ", not success" ) @@ -153,7 +153,7 @@ class Release: page += 1 - raise Exception( + raise KeyError( f"the status {RELEASE_READY_STATUS} " f"is not found for commit {self.release_commit}" ) @@ -188,7 +188,7 @@ class Release: raise if check_run_from_master and self._git.branch != "master": - raise Exception("the script must be launched only from master") + raise RuntimeError("the script must be launched only from master") self.set_release_info() @@ -229,7 +229,7 @@ class Release: def check_no_tags_after(self): tags_after_commit = self.run(f"git tag --contains={self.release_commit}") if tags_after_commit: - raise Exception( + raise RuntimeError( f"Commit {self.release_commit} belongs to following tags:\n" f"{tags_after_commit}\nChoose another commit" ) @@ -253,7 +253,7 @@ class Release: ) output = self.run(f"git branch --contains={self.release_commit} {branch}") if branch not in output: - raise Exception( + raise RuntimeError( f"commit {self.release_commit} must belong to {branch} " f"for {self.release_type} release" ) @@ -464,9 +464,9 @@ class Release: logging.warning("Rolling back checked out %s for %s", ref, orig_ref) self.run(f"git reset --hard; git checkout -f {orig_ref}") raise - else: - if with_checkout_back and need_rollback: - self.run(rollback_cmd) + # Normal flow when we need to checkout back + if with_checkout_back and need_rollback: + self.run(rollback_cmd) @contextmanager def _create_branch(self, name: str, start_point: str = "") -> Iterator[None]: diff --git a/tests/ci/report.py b/tests/ci/report.py index 282c343eec3..669409d1729 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -22,8 +22,8 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI_CONFIG, BuildConfig -from env_helper import REPORT_PATH, TEMP_PATH from ci_utils import normalize_string +from env_helper import REPORT_PATH, TEMP_PATH logger = logging.getLogger(__name__) @@ -296,7 +296,7 @@ class JobReport: def load(cls, from_file=None): # type: ignore res = {} from_file = from_file or JOB_REPORT_FILE - with open(from_file, "r") as json_file: + with open(from_file, "r", encoding="utf-8") as json_file: res = json.load(json_file) # Deserialize the nested lists of TestResult test_results_data = res.get("test_results", []) @@ -316,7 +316,7 @@ class JobReport: raise TypeError("Type not serializable") to_file = to_file or JOB_REPORT_FILE - with open(to_file, "w") as json_file: + with open(to_file, "w", encoding="utf-8") as json_file: json.dump(asdict(self), json_file, default=path_converter, indent=2) @@ -418,7 +418,7 @@ class BuildResult: def load_from_file(cls, file: Union[Path, str]): # type: ignore if not Path(file).exists(): return None - with open(file, "r") as json_file: + with open(file, "r", encoding="utf-8") as json_file: res = json.load(json_file) return BuildResult(**res) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 59d2a3d6275..9d33c480598 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -46,14 +46,14 @@ def main(): build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find binary clickhouse among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index 2fe6aabd69c..c8c2adbbd56 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -53,14 +53,14 @@ def main(): print(build_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find the clickhouse binary among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 275f26fd65f..321826fcf44 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 -import shutil +import logging import os +import shutil +import signal import subprocess import tempfile -import logging -import signal class SSHAgent: @@ -21,7 +21,7 @@ class SSHAgent: def start(self): if shutil.which("ssh-agent") is None: - raise Exception("ssh-agent binary is not available") + raise RuntimeError("ssh-agent binary is not available") self._env_backup["SSH_AUTH_SOCK"] = os.environ.get("SSH_AUTH_SOCK") self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS") @@ -54,7 +54,7 @@ class SSHAgent: def remove(self, key_pub): if key_pub not in self._keys: - raise Exception(f"Private key not found, public part: {key_pub}") + raise ValueError(f"Private key not found, public part: {key_pub}") if self._keys[key_pub] > 1: self._keys[key_pub] -= 1 @@ -107,7 +107,7 @@ class SSHAgent: if p.returncode: message = stderr.strip() + b"\n" + stdout.strip() - raise Exception(message.strip().decode()) + raise RuntimeError(message.strip().decode()) return stdout @@ -115,9 +115,9 @@ class SSHAgent: class SSHKey: def __init__(self, key_name=None, key_value=None): if key_name is None and key_value is None: - raise Exception("Either key_name or key_value must be specified") + raise ValueError("Either key_name or key_value must be specified") if key_name is not None and key_value is not None: - raise Exception("key_name or key_value must be specified") + raise ValueError("key_name or key_value must be specified") if key_name is not None: self.key = os.getenv(key_name) else: diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 7d582e683e0..7ccc058f79f 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """This script is used in docker images for stress tests and upgrade tests""" -from multiprocessing import cpu_count -from pathlib import Path -from subprocess import Popen, call, check_output, STDOUT, PIPE -from typing import List import argparse import logging import random import time +from multiprocessing import cpu_count +from pathlib import Path +from subprocess import PIPE, STDOUT, Popen, call, check_output +from typing import List def get_options(i: int, upgrade_check: bool) -> str: @@ -90,12 +90,13 @@ def run_func_test( ] pipes = [] for i, path in enumerate(output_paths): - with open(path, "w") as op: + with open(path, "w", encoding="utf-8") as op: full_command = ( f"{cmd} {get_options(i, upgrade_check)} {global_time_limit_option} " f"{skip_tests_option} {upgrade_check_option}" ) logging.info("Run func tests '%s'", full_command) + # pylint:disable-next=consider-using-with pipes.append(Popen(full_command, shell=True, stdout=op, stderr=op)) time.sleep(0.5) return pipes @@ -204,6 +205,7 @@ def prepare_for_hung_check(drop_databases: bool) -> bool: continue command = make_query_command(f"DETACH DATABASE {db}") # we don't wait for drop + # pylint:disable-next=consider-using-with Popen(command, shell=True) break except Exception as ex: @@ -212,7 +214,7 @@ def prepare_for_hung_check(drop_databases: bool) -> bool: ) time.sleep(i) else: - raise Exception( + raise RuntimeError( "Cannot drop databases after stress tests. Probably server consumed " "too much memory and cannot execute simple queries" ) @@ -293,7 +295,9 @@ def main(): args = parse_args() if args.drop_databases and not args.hung_check: - raise Exception("--drop-databases only used in hung check (--hung-check)") + raise argparse.ArgumentTypeError( + "--drop-databases only used in hung check (--hung-check)" + ) # FIXME Hung check with ubsan is temporarily disabled due to # https://github.com/ClickHouse/ClickHouse/issues/45372 @@ -359,15 +363,17 @@ def main(): ] ) hung_check_log = args.output_folder / "hung_check.log" # type: Path - tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) - res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600) - if tee.stdin is not None: - tee.stdin.close() + with Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) as tee: + res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600) + if tee.stdin is not None: + tee.stdin.close() if res != 0 and have_long_running_queries and not suppress_hung_check: logging.info("Hung check failed with exit code %d", res) else: hung_check_status = "No queries hung\tOK\t\\N\t\n" - with open(args.output_folder / "test_results.tsv", "w+") as results: + with open( + args.output_folder / "test_results.tsv", "w+", encoding="utf-8" + ) as results: results.write(hung_check_status) hung_check_log.unlink() diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index f64f114d3de..2c2862d926a 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -23,7 +23,7 @@ def get_test_name(line): for element in elements: if "(" not in element and ")" not in element: return element - raise Exception(f"No test name in line '{line}'") + raise ValueError(f"No test name in line '{line}'") def process_results( From 842ca051a9a096f64bc289a4a2364867df401a43 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Feb 2024 21:24:45 +0100 Subject: [PATCH 253/356] Fix more linter issues in ci-runner.py --- tests/integration/ci-runner.py | 134 ++++++++++++++++----------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index acafa16b837..e7f691d2237 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -15,6 +15,7 @@ import sys import time import zlib # for crc32 from collections import defaultdict +from itertools import chain from integration_test_images import IMAGES @@ -103,7 +104,7 @@ def get_counters(fname): "SKIPPED": set([]), } - with open(fname, "r") as out: + with open(fname, "r", encoding="utf-8") as out: for line in out: line = line.strip() # Example of log: @@ -119,7 +120,7 @@ def get_counters(fname): # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client # ^^^^^^^^^^^^^ if line.strip().startswith("["): - line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) + line = re.sub(r"^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) line_arr = line.split(" ") if len(line_arr) < 2: @@ -161,7 +162,7 @@ def get_counters(fname): def parse_test_times(fname): read = False description_output = [] - with open(fname, "r") as out: + with open(fname, "r", encoding="utf-8") as out: for line in out: if read and "==" in line: break @@ -227,7 +228,7 @@ def clear_ip_tables_and_restart_daemons(): time.sleep(0.5) logging.info("Waiting docker to start, current %s", str(err)) else: - raise Exception("Docker daemon doesn't responding") + raise RuntimeError("Docker daemon doesn't responding") except subprocess.CalledProcessError as err: logging.info("Can't reload docker: %s", str(err)) @@ -306,13 +307,9 @@ class ClickhouseIntegrationTestsRunner: image_cmd = self._get_runner_image_cmd(repo_path) cmd = ( - "cd {repo_path}/tests/integration && " - "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format( - repo_path=repo_path, - runner_opts=self._get_runner_opts(), - image_cmd=image_cmd, - command=r""" echo Pre Pull finished """, - ) + f"cd {repo_path}/tests/integration && " + f"timeout --signal=KILL 1h ./runner {self._get_runner_opts()} {image_cmd} " + "--pre-pull --command ' echo Pre Pull finished ' " ) for i in range(5): @@ -332,7 +329,7 @@ class ClickhouseIntegrationTestsRunner: @staticmethod def _can_run_with(path, opt): - with open(path, "r") as script: + with open(path, "r", encoding="utf-8") as script: for line in script: if opt in line: return True @@ -352,21 +349,23 @@ class ClickhouseIntegrationTestsRunner: logging.info("Package found in %s", full_path) log_name = "install_" + f + ".log" log_path = os.path.join(str(self.path()), log_name) - with open(log_path, "w") as log: - cmd = "dpkg -x {} .".format(full_path) + with open(log_path, "w", encoding="utf-8") as log: + cmd = f"dpkg -x {full_path} ." logging.info("Executing installation cmd %s", cmd) - retcode = subprocess.Popen( + with subprocess.Popen( cmd, shell=True, stderr=log, stdout=log - ).wait() - if retcode == 0: - logging.info("Installation of %s successfull", full_path) - else: - raise Exception( - "Installation of {} failed".format(full_path) - ) + ) as proc: + if proc.wait() == 0: + logging.info( + "Installation of %s successfull", full_path + ) + else: + raise RuntimeError( + f"Installation of {full_path} failed" + ) break else: - raise Exception("Package with {} not found".format(package)) + raise FileNotFoundError(f"Package with {package} not found") # logging.info("Unstripping binary") # logging.info( # "Unstring %s", @@ -395,9 +394,8 @@ class ClickhouseIntegrationTestsRunner: @staticmethod def _compress_logs(directory, relpaths, result_path): retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "tar --use-compress-program='zstd --threads=0' -cf {} -C {} {}".format( - result_path, directory, " ".join(relpaths) - ), + f"tar --use-compress-program='zstd --threads=0' -cf {result_path} -C " + f"{directory} {' '.join(relpaths)}", shell=True, ) # tar return 1 when the files are changed on compressing, we ignore it @@ -451,21 +449,18 @@ class ClickhouseIntegrationTestsRunner: @staticmethod def _get_parallel_tests_skip_list(repo_path): - skip_list_file_path = "{}/tests/integration/parallel_skip.json".format( - repo_path - ) + skip_list_file_path = f"{repo_path}/tests/integration/parallel_skip.json" if ( not os.path.isfile(skip_list_file_path) or os.path.getsize(skip_list_file_path) == 0 ): - raise Exception( - "There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format( - skip_list_file_path - ) + raise ValueError( + "There is something wrong with getting all tests list: " + f"file '{skip_list_file_path}' is empty or does not exist." ) skip_list_tests = [] - with open(skip_list_file_path, "r") as skip_list_file: + with open(skip_list_file_path, "r", encoding="utf-8") as skip_list_file: skip_list_tests = json.load(skip_list_file) return list(sorted(skip_list_tests)) @@ -520,14 +515,15 @@ class ClickhouseIntegrationTestsRunner: logging.info( "Can run with custom docker image version %s", runner_version ) - image_cmd += " --docker-image-version={} ".format(runner_version) + image_cmd += f" --docker-image-version={runner_version} " else: if self._can_run_with( os.path.join(repo_path, "tests/integration", "runner"), "--docker-compose-images-tags", ): - image_cmd += "--docker-compose-images-tags={} ".format( - self.get_image_with_version(img) + image_cmd += ( + "--docker-compose-images-tags=" + f"{self.get_image_with_version(img)} " ) else: image_cmd = "" @@ -580,7 +576,7 @@ class ClickhouseIntegrationTestsRunner: broken_tests, ) except Exception as e: - logging.info("Failed to run %s:\n%s", str(test_group), str(e)) + logging.info("Failed to run %s:\n%s", test_group, e) counters = { "ERROR": [], "PASSED": [], @@ -641,31 +637,27 @@ class ClickhouseIntegrationTestsRunner: info_path = os.path.join(repo_path, "tests/integration", info_basename) test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)]) - parallel_cmd = ( - " --parallel {} ".format(num_workers) if num_workers > 0 else "" - ) + parallel_cmd = f" --parallel {num_workers} " if num_workers > 0 else "" # -r -- show extra test summary: # -f -- (f)ailed # -E -- (E)rror # -p -- (p)assed # -s -- (s)kipped - cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format( - repo_path, - self._get_runner_opts(), - image_cmd, - test_cmd, - parallel_cmd, - i, - _get_deselect_option(self.should_skip_tests()), - info_path, + cmd = ( + f"cd {repo_path}/tests/integration && " + f"timeout --signal=KILL 1h ./runner {self._get_runner_opts()} " + f"{image_cmd} -t {test_cmd} {parallel_cmd} -- -rfEps --run-id={i} " + f"--color=no --durations=0 {_get_deselect_option(self.should_skip_tests())} " + f"| tee {info_path}" ) log_basename = test_group_str + "_" + str(i) + ".log" log_path = os.path.join(repo_path, "tests/integration", log_basename) - with open(log_path, "w") as log: + with open(log_path, "w", encoding="utf-8") as log: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee - subprocess.Popen(cmd, shell=True, stderr=log, stdout=log).wait() + with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + proc.wait() extra_logs_names = [log_basename] log_result_path = os.path.join( @@ -756,11 +748,14 @@ class ClickhouseIntegrationTestsRunner: # want to mark them as error so we filter by '::'. for test in tests_in_group: if ( - test not in counters["PASSED"] - and test not in counters["ERROR"] - and test not in counters["SKIPPED"] - and test not in counters["FAILED"] - and test not in counters["BROKEN"] + test + not in chain( + counters["PASSED"], + counters["ERROR"], + counters["SKIPPED"], + counters["FAILED"], + counters["BROKEN"], + ) and "::" in test ): counters["ERROR"].append(test) @@ -825,7 +820,7 @@ class ClickhouseIntegrationTestsRunner: ( c + " (✕" + str(final_retry) + ")", text_state, - "{:.2f}".format(tests_times[c]), + f"{tests_times[c]:.2f}", ) for c in counters[state] ] @@ -847,7 +842,7 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Pulling images") - runner._pre_pull_images(repo_path) + self._pre_pull_images(repo_path) logging.info( "Dump iptables before run %s", @@ -920,11 +915,15 @@ class ClickhouseIntegrationTestsRunner: logging.info("Shuffling test groups") random.shuffle(items_to_run) - broken_tests = list() + broken_tests = [] if self.use_analyzer: - with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f: + with open( + f"{repo_path}/tests/analyzer_integration_broken_tests.txt", + "r", + encoding="utf-8", + ) as f: broken_tests = f.read().splitlines() - logging.info(f"Broken tests in the list: {len(broken_tests)}") + logging.info("Broken tests in the list: %s", len(broken_tests)) for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) @@ -976,12 +975,12 @@ class ClickhouseIntegrationTestsRunner: else: text_state = state test_result += [ - (c, text_state, "{:.2f}".format(tests_times[c]), tests_log_paths[c]) + (c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c]) for c in counters[state] ] failed_sum = len(counters["FAILED"]) + len(counters["ERROR"]) - status_text = "fail: {}, passed: {}".format(failed_sum, len(counters["PASSED"])) + status_text = f"fail: {failed_sum}, passed: {len(counters['PASSED'])}" if self.soft_deadline_time < time.time(): status_text = "Timeout, " + status_text @@ -998,10 +997,10 @@ class ClickhouseIntegrationTestsRunner: def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, "w") as f: + with open(status_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow(status) @@ -1014,7 +1013,8 @@ if __name__ == "__main__": result_path = os.environ.get("CLICKHOUSE_TESTS_RESULT_PATH") params_path = os.environ.get("CLICKHOUSE_TESTS_JSON_PARAMS_PATH") - params = json.loads(open(params_path, "r").read()) + with open(params_path, "r", encoding="utf-8") as jfd: + params = json.loads(jfd.read()) runner = ClickhouseIntegrationTestsRunner(result_path, params) logging.info("Running tests") From 711da9505e990094b970d008e4307d3dee6c2a10 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Feb 2024 16:02:30 +0100 Subject: [PATCH 254/356] Fix liter issues in conftest.py --- tests/integration/conftest.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 3e03a49aaec..a4b18ff523a 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 -import pytest import logging import os -from helpers.cluster import run_and_check -from helpers.test_tools import TSV -from helpers.network import _NetworkManager +import pytest # pylint:disable=import-error; for style check +from helpers.cluster import run_and_check +from helpers.network import _NetworkManager # This is a workaround for a problem with logging in pytest [1]. # @@ -34,32 +33,35 @@ def tune_local_port_range(): def cleanup_environment(): try: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1: - logging.debug(f"Cleaning all iptables rules") + logging.debug("Cleaning all iptables rules") _NetworkManager.clean_all_user_iptables_rules() result = run_and_check(["docker ps | wc -l"], shell=True) if int(result) > 1: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1: logging.warning( - f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n" - "You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup." + "Docker containters(%s) are running before tests run. " + "They can be left from previous pytest run and cause test failures.\n" + "You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with " + "--cleanup-containers argument to enable automatic containers cleanup.", + int(result), ) else: logging.debug("Trying to kill unstopped containers...") run_and_check( - [f"docker kill $(docker container list --all --quiet)"], + ["docker kill $(docker container list --all --quiet)"], shell=True, nothrow=True, ) run_and_check( - [f"docker rm $docker container list --all --quiet)"], + ["docker rm $docker container list --all --quiet)"], shell=True, nothrow=True, ) logging.debug("Unstopped containers killed") r = run_and_check(["docker-compose", "ps", "--services", "--all"]) - logging.debug(f"Docker ps before start:{r.stdout}") + logging.debug("Docker ps before start:%s", r.stdout) else: - logging.debug(f"No running containers") + logging.debug("No running containers") logging.debug("Pruning Docker networks") run_and_check( @@ -68,8 +70,7 @@ def cleanup_environment(): nothrow=True, ) except Exception as e: - logging.exception(f"cleanup_environment:{str(e)}") - pass + logging.exception("cleanup_environment:%s", e) yield From 770d7104745e7755feed946883ca962184d938a7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Feb 2024 17:39:40 +0100 Subject: [PATCH 255/356] Fix linter issues in sqllogic module --- tests/sqllogic/connection.py | 24 ++++---- tests/sqllogic/exceptions.py | 26 ++------ tests/sqllogic/runner.py | 41 +++++++------ tests/sqllogic/test_parser.py | 75 +++++++++++------------ tests/sqllogic/test_runner.py | 108 ++++++++++++++++------------------ 5 files changed, 124 insertions(+), 150 deletions(-) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 2d5e1f8a9e9..8bbb76d83df 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -1,18 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import datetime -import logging -import pyodbc -import sqlite3 -import traceback import enum +import logging import random +import sqlite3 import string from contextlib import contextmanager +import pyodbc # pylint:disable=import-error; for style check from exceptions import ProgramError - logger = logging.getLogger("connection") logger.setLevel(logging.DEBUG) @@ -22,9 +19,7 @@ class OdbcConnectingArgs: self._kwargs = kwargs def __str__(self): - conn_str = ";".join( - ["{}={}".format(x, y) for x, y in self._kwargs.items() if y] - ) + conn_str = ";".join([f"{x}={y}" for x, y in self._kwargs.items() if y]) return conn_str def update_database(self, database): @@ -49,6 +44,7 @@ class OdbcConnectingArgs: for kv in conn_str.split(";"): if kv: k, v = kv.split("=", 1) + # pylint:disable-next=protected-access args._kwargs[k] = v return args @@ -82,7 +78,7 @@ class KnownDBMS(str, enum.Enum): clickhouse = "ClickHouse" -class ConnectionWrap(object): +class ConnectionWrap: def __init__(self, connection=None, factory=None, factory_kwargs=None): self._factory = factory self._factory_kwargs = factory_kwargs @@ -126,7 +122,7 @@ class ConnectionWrap(object): f"SELECT name FROM system.tables WHERE database='{self.DATABASE_NAME}'" ) elif self.DBMS_NAME == KnownDBMS.sqlite.value: - list_query = f"SELECT name FROM sqlite_master WHERE type='table'" + list_query = "SELECT name FROM sqlite_master WHERE type='table'" else: logger.warning( "unable to drop all tables for unknown database: %s", self.DBMS_NAME @@ -154,7 +150,7 @@ class ConnectionWrap(object): self._use_database(database) logger.info( "currentDatabase : %s", - execute_request(f"SELECT currentDatabase()", self).get_result(), + execute_request("SELECT currentDatabase()", self).get_result(), ) @contextmanager @@ -174,7 +170,7 @@ class ConnectionWrap(object): def __exit__(self, *args): if hasattr(self._connection, "close"): - return self._connection.close() + self._connection.close() def setup_connection(engine, conn_str=None, make_debug_request=True): @@ -263,7 +259,7 @@ class ExecResult: def assert_no_exception(self): if self.has_exception(): raise ProgramError( - f"request doesn't have a result set, it has the exception", + "request doesn't have a result set, it has the exception", parent=self._exception, ) diff --git a/tests/sqllogic/exceptions.py b/tests/sqllogic/exceptions.py index 30c8983d80f..2e4da3fd78b 100644 --- a/tests/sqllogic/exceptions.py +++ b/tests/sqllogic/exceptions.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from enum import Enum - class Error(Exception): def __init__( @@ -45,16 +43,8 @@ class Error(Exception): @property def reason(self): - return ", ".join( - ( - str(x) - for x in [ - super().__str__(), - "details: {}".format(self._details) if self._details else "", - ] - if x - ) - ) + details = f"details: {self._details}" if self._details else "" + return ", ".join((str(x) for x in [super().__str__(), details] if x)) def set_details(self, file=None, name=None, pos=None, request=None, details=None): if file is not None: @@ -88,16 +78,8 @@ class ErrorWithParent(Error): @property def reason(self): - return ", ".join( - ( - str(x) - for x in [ - super().reason, - "exception: {}".format(str(self._parent)) if self._parent else "", - ] - if x - ) - ) + exception = f"exception: {self._parent}" if self._parent else "" + return ", ".join((str(x) for x in [super().reason, exception] if x)) class ProgramError(ErrorWithParent): diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py index 5f4baf8e59b..2e8e098a099 100755 --- a/tests/sqllogic/runner.py +++ b/tests/sqllogic/runner.py @@ -2,20 +2,25 @@ # -*- coding: utf-8 -*- import argparse -import enum -import os -import logging import csv +import enum import json +import logging import multiprocessing +import os from functools import reduce -from deepdiff import DeepDiff -from connection import setup_connection, Engines, default_clickhouse_odbc_conn_str -from test_runner import TestRunner, Status, RequestType +# isort: off +from deepdiff import DeepDiff # pylint:disable=import-error; for style check +# isort: on -LEVEL_NAMES = [x.lower() for x in logging._nameToLevel.keys() if x != logging.NOTSET] +from connection import Engines, default_clickhouse_odbc_conn_str, setup_connection +from test_runner import RequestType, Status, TestRunner + +LEVEL_NAMES = [ # pylint:disable-next=protected-access + l.lower() for l, n in logging._nameToLevel.items() if n != logging.NOTSET +] def setup_logger(args): @@ -41,7 +46,7 @@ def __write_check_status(status_row, out_dir): if len(status_row) > 140: status_row = status_row[0:135] + "..." check_status_path = os.path.join(out_dir, "check_status.tsv") - with open(check_status_path, "a") as stream: + with open(check_status_path, "a", encoding="utf-8") as stream: writer = csv.writer(stream, delimiter="\t", lineterminator="\n") writer.writerow(status_row) @@ -60,7 +65,7 @@ def __write_test_result( ): all_stages = reports.keys() test_results_path = os.path.join(out_dir, "test_results.tsv") - with open(test_results_path, "a") as stream: + with open(test_results_path, "a", encoding="utf-8") as stream: writer = csv.writer(stream, delimiter="\t", lineterminator="\n") for stage in all_stages: report = reports[stage] @@ -182,7 +187,7 @@ def mode_check_statements(parser): input_dir, f"check statements:: not a dir {input_dir}" ) - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") @@ -242,7 +247,7 @@ def mode_check_complete(parser): input_dir, f"check statements:: not a dir {input_dir}" ) - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") @@ -286,9 +291,9 @@ def make_actual_report(reports): return {stage: report.get_map() for stage, report in reports.items()} -def write_actual_report(actial, out_dir): - with open(os.path.join(out_dir, "actual_report.json"), "w") as f: - f.write(json.dumps(actial)) +def write_actual_report(actual, out_dir): + with open(os.path.join(out_dir, "actual_report.json"), "w", encoding="utf-8") as f: + f.write(json.dumps(actual)) def read_canonic_report(input_dir): @@ -296,13 +301,15 @@ def read_canonic_report(input_dir): if not os.path.exists(file): return {} - with open(os.path.join(input_dir, "canonic_report.json"), "r") as f: + with open( + os.path.join(input_dir, "canonic_report.json"), "r", encoding="utf-8" + ) as f: data = f.read() return json.loads(data) def write_canonic_report(canonic, out_dir): - with open(os.path.join(out_dir, "canonic_report.json"), "w") as f: + with open(os.path.join(out_dir, "canonic_report.json"), "w", encoding="utf-8") as f: f.write(json.dumps(canonic)) @@ -370,7 +377,7 @@ def mode_self_test(parser): if not os.path.isdir(out_dir): raise NotADirectoryError(out_dir, f"self test: not a dir {out_dir}") - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index cb1144d7dd9..648fa9f6bf6 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -2,24 +2,27 @@ # -*- coding: utf-8 -*- import logging -import os - -from itertools import chain from enum import Enum -from hashlib import md5 from functools import reduce +from hashlib import md5 +from itertools import chain + +# isort: off +# pylint:disable=import-error; for style check import sqlglot -from sqlglot.expressions import PrimaryKeyColumnConstraint, ColumnDef +from sqlglot.expressions import ColumnDef, PrimaryKeyColumnConstraint + +# pylint:enable=import-error; for style check +# isort: on from exceptions import ( - Error, - ProgramError, - ErrorWithParent, DataResultDiffer, + Error, + ErrorWithParent, + ProgramError, QueryExecutionError, ) - logger = logging.getLogger("parser") logger.setLevel(logging.DEBUG) @@ -248,6 +251,7 @@ class FileBlockBase: ) block.with_result(result) return block + raise ValueError(f"Unknown block_type {block_type}") def dump_to(self, output): if output is None: @@ -258,9 +262,6 @@ class FileBlockBase: class FileBlockComments(FileBlockBase): - def __init__(self, parser, start, end): - super().__init__(parser, start, end) - def get_block_type(self): return BlockType.comments @@ -469,20 +470,18 @@ class QueryResult: ( str(x) for x in [ - "rows: {}".format(self.rows) if self.rows else "", - "values_count: {}".format(self.values_count) - if self.values_count - else "", - "data_hash: {}".format(self.data_hash) if self.data_hash else "", - "exception: {}".format(self.exception) if self.exception else "", - "hash_threshold: {}".format(self.hash_threshold) + f"rows: {self.rows}" if self.rows else "", + f"values_count: {self.values_count}" if self.values_count else "", + f"data_hash: {self.data_hash}" if self.data_hash else "", + f"exception: {self.exception}" if self.exception else "", + f"hash_threshold: {self.hash_threshold}" if self.hash_threshold else "", ] if x ) ) - return "QueryResult({})".format(params) + return f"QueryResult({params})" def __iter__(self): if self.rows is not None: @@ -491,12 +490,10 @@ class QueryResult: if self.values_count <= self.hash_threshold: return iter(self.rows) if self.data_hash is not None: - return iter( - [["{} values hashing to {}".format(self.values_count, self.data_hash)]] - ) + return iter([[f"{self.values_count} values hashing to {self.data_hash}"]]) if self.exception is not None: - return iter([["exception: {}".format(self.exception)]]) - raise ProgramError("Query result is empty", details="{}".format(self.__str__())) + return iter([[f"exception: {self.exception}"]]) + raise ProgramError("Query result is empty", details=str(self)) @staticmethod def __value_count(rows): @@ -528,7 +525,7 @@ class QueryResult: for row in rows: res_row = [] for c, t in zip(row, types): - logger.debug(f"Builging row. c:{c} t:{t}") + logger.debug("Builging row. c:%s t:%s", c, t) if c is None: res_row.append("NULL") continue @@ -541,7 +538,7 @@ class QueryResult: elif t == "I": try: res_row.append(str(int(c))) - except ValueError as ex: + except ValueError: # raise QueryExecutionError( # f"Got non-integer result '{c}' for I type." # ) @@ -549,7 +546,7 @@ class QueryResult: except OverflowError as ex: raise QueryExecutionError( f"Got overflowed result '{c}' for I type." - ) + ) from ex elif t == "R": res_row.append(f"{c:.3f}") @@ -567,6 +564,7 @@ class QueryResult: values = list(chain(*rows)) values.sort() return [values] if values else [] + return [] @staticmethod def __calculate_hash(rows): @@ -595,9 +593,9 @@ class QueryResult: # do not print details to the test file # but print original exception if isinstance(e, ErrorWithParent): - message = "{}, original is: {}".format(e, e.get_parent()) + message = f"{e}, original is: {e.get_parent()}" else: - message = "{}".format(e) + message = str(e) return QueryResult(exception=message) @@ -616,9 +614,8 @@ class QueryResult: "canonic and actual results have different exceptions", details=f"canonic: {canonic.exception}, actual: {actual.exception}", ) - else: - # exceptions are the same - return + # exceptions are the same + return elif canonic.exception is not None: raise DataResultDiffer( "canonic result has exception and actual result doesn't", @@ -639,9 +636,8 @@ class QueryResult: if canonic.values_count != actual.values_count: raise DataResultDiffer( "canonic and actual results have different value count", - details="canonic values count {}, actual {}".format( - canonic.values_count, actual.values_count - ), + details=f"canonic values count {canonic.values_count}, " + f"actual {actual.values_count}", ) if canonic.data_hash != actual.data_hash: raise DataResultDiffer( @@ -653,9 +649,8 @@ class QueryResult: if canonic.values_count != actual.values_count: raise DataResultDiffer( "canonic and actual results have different value count", - details="canonic values count {}, actual {}".format( - canonic.values_count, actual.values_count - ), + details=f"canonic values count {canonic.values_count}, " + f"actual {actual.values_count}", ) if canonic.rows != actual.rows: raise DataResultDiffer( @@ -665,5 +660,5 @@ class QueryResult: raise ProgramError( "Unable to compare results", - details="actual {}, canonic {}".format(actual, canonic), + details=f"actual {actual}, canonic {canonic}", ) diff --git a/tests/sqllogic/test_runner.py b/tests/sqllogic/test_runner.py index baec0dc7924..8f2242a45b9 100644 --- a/tests/sqllogic/test_runner.py +++ b/tests/sqllogic/test_runner.py @@ -1,25 +1,23 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import enum -import logging -import os -import traceback import io import json +import logging +import os import test_parser +from connection import execute_request from exceptions import ( + DataResultDiffer, Error, ProgramError, - DataResultDiffer, - StatementExecutionError, - StatementSuccess, QueryExecutionError, QuerySuccess, SchemeResultDiffer, + StatementExecutionError, + StatementSuccess, ) -from connection import execute_request - logger = logging.getLogger("parser") logger.setLevel(logging.DEBUG) @@ -55,6 +53,7 @@ class Status(str, enum.Enum): class TestStatus: def __init__(self): + self.name = None self.status = None self.file = None self.position = None @@ -155,7 +154,7 @@ class SimpleStats: self.success += 1 def get_map(self): - result = dict() + result = {} result["success"] = self.success result["fail"] = self.fail return result @@ -187,7 +186,7 @@ class Stats: choose.update(status) def get_map(self): - result = dict() + result = {} result["statements"] = self.statements.get_map() result["queries"] = self.queries.get_map() result["total"] = self.total.get_map() @@ -205,7 +204,7 @@ class OneReport: self.test_name = test_name self.test_file = test_file self.stats = Stats() - self.requests = dict() # type: dict(int, TestStatus) + self.requests = {} def update(self, status): if not isinstance(status, TestStatus): @@ -218,11 +217,11 @@ class OneReport: return str(self.get_map()) def get_map(self): - result = dict() + result = {} result["test_name"] = self.test_name result["test_file"] = self.test_file result["stats"] = self.stats.get_map() - result["requests"] = dict() + result["requests"] = {} requests = result["requests"] for pos, status in self.requests.items(): requests[pos] = status.get_map() @@ -233,7 +232,7 @@ class Report: def __init__(self, dbms_name, input_dir=None): self.dbms_name = dbms_name self.stats = Stats() - self.tests = dict() # type: dict(str, OneReport) + self.tests = {} self.input_dir = input_dir self.output_dir = None @@ -256,7 +255,7 @@ class Report: self.output_dir = res_dir def get_map(self): - result = dict() + result = {} result["dbms_name"] = self.dbms_name result["stats"] = self.stats.get_map() result["input_dir"] = self.input_dir @@ -264,7 +263,7 @@ class Report: result["input_dir"] = self.input_dir if self.output_dir is not None: result["output_dir"] = self.output_dir - result["tests"] = dict() + result["tests"] = {} tests = result["tests"] for test_name, one_report in self.tests.items(): tests.update({test_name: one_report.get_map()}) @@ -297,8 +296,8 @@ class Report: def write_report(self, report_dir): report_path = os.path.join(report_dir, "report.json") - logger.info(f"create file {report_path}") - with open(report_path, "w") as stream: + logger.info("create file %s", report_path) + with open(report_path, "w", encoding="utf-8") as stream: stream.write(json.dumps(self.get_map(), indent=4)) @@ -434,38 +433,34 @@ class TestRunner: details=f"expected error: {expected_error}", parent=exec_res.get_exception(), ) - else: - clogger.debug("errors matched") - raise QuerySuccess() - else: - clogger.debug("missed error") - raise QueryExecutionError( - "query is expected to fail with error", - details="expected error: {}".format(expected_error), + clogger.debug("errors matched") + raise QuerySuccess() + clogger.debug("missed error") + raise QueryExecutionError( + "query is expected to fail with error", + details=f"expected error: {expected_error}", + ) + clogger.debug("success is expected") + if exec_res.has_exception(): + clogger.debug("had error") + if self.verify: + clogger.debug("verify mode") + canonic = test_parser.QueryResult.parse_it( + block.get_result(), 10 ) - else: - clogger.debug("success is expected") - if exec_res.has_exception(): - clogger.debug("had error") - if self.verify: - clogger.debug("verify mode") - canonic = test_parser.QueryResult.parse_it( - block.get_result(), 10 - ) - exception = QueryExecutionError( - "query execution failed with an exception", - parent=exec_res.get_exception(), - ) - actual = test_parser.QueryResult.as_exception(exception) - test_parser.QueryResult.assert_eq(canonic, actual) - block.with_result(actual) - raise QuerySuccess() - else: - clogger.debug("completion mode") - raise QueryExecutionError( - "query execution failed with an exception", - parent=exec_res.get_exception(), - ) + exception = QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) + actual = test_parser.QueryResult.as_exception(exception) + test_parser.QueryResult.assert_eq(canonic, actual) + block.with_result(actual) + raise QuerySuccess() + clogger.debug("completion mode") + raise QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) canonic_types = block.get_types() clogger.debug("canonic types %s", canonic_types) @@ -476,9 +471,8 @@ class TestRunner: if canonic_columns_count != actual_columns_count: raise SchemeResultDiffer( "canonic and actual columns count differ", - details="expected columns {}, actual columns {}".format( - canonic_columns_count, actual_columns_count - ), + details=f"expected columns {canonic_columns_count}, " + f"actual columns {actual_columns_count}", ) actual = test_parser.QueryResult.make_it( @@ -528,7 +522,7 @@ class TestRunner: self.report = Report(self.dbms_name, self._input_dir) if self.results is None: - self.results = dict() + self.results = {} if self.dbms_name == "ClickHouse" and test_name in [ "test/select5.test", @@ -536,7 +530,7 @@ class TestRunner: "test/evidence/slt_lang_replace.test", "test/evidence/slt_lang_droptrigger.test", ]: - logger.info(f"Let's skip test %s for ClickHouse", test_name) + logger.info("Let's skip test %s for ClickHouse", test_name) return with self.connection.with_one_test_scope(): @@ -565,7 +559,7 @@ class TestRunner: test_name = os.path.relpath(test_file, start=self._input_dir) logger.debug("open file %s", test_name) - with open(test_file, "r") as stream: + with open(test_file, "r", encoding="utf-8") as stream: self.run_one_test(stream, test_name, test_file) def run_all_tests_from_dir(self, input_dir): @@ -582,10 +576,10 @@ class TestRunner: for test_name, stream in self.results.items(): test_file = os.path.join(dir_path, test_name) - logger.info(f"create file {test_file}") + logger.info("create file %s", test_file) result_dir = os.path.dirname(test_file) os.makedirs(result_dir, exist_ok=True) - with open(test_file, "w") as output: + with open(test_file, "w", encoding="utf-8") as output: output.write(stream.getvalue()) def write_report(self, report_dir): From 34bb40583cd0180f79c380b93808ca7ad441bc96 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 28 Feb 2024 22:58:33 +0100 Subject: [PATCH 256/356] Install stubs for python-requests --- docker/test/style/Dockerfile | 1 + tests/ci/.mypy.ini | 2 +- tests/ci/build_download_helper.py | 5 +++-- .../ci/cancel_and_rerun_workflow_lambda/app.py | 2 +- tests/ci/ci_runners_metrics_lambda/app.py | 2 +- tests/ci/clean_lost_instances_lambda/app.py | 2 +- tests/ci/clickhouse_helper.py | 2 +- tests/ci/download_release_packages.py | 2 +- tests/ci/get_previous_release_tag.py | 2 +- tests/ci/jepsen_check.py | 2 +- .../lambda_shared/__init__.py | 2 +- .../lambda_shared/token.py | 2 +- tests/ci/runner_token_rotation_lambda/app.py | 2 +- tests/ci/slack_bot_ci_lambda/app.py | 18 ++++++------------ tests/ci/team_keys_lambda/app.py | 2 +- tests/ci/workflow_approve_rerun_lambda/app.py | 2 +- 16 files changed, 23 insertions(+), 27 deletions(-) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 6153aec2144..122f558bab2 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ yamllint \ locales \ && pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \ + requests types-requests \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ && rm -rf /root/.cache/pip diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index bcaac49f190..9bc44025826 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -14,4 +14,4 @@ warn_unused_ignores = False warn_return_any = True no_implicit_reexport = True strict_equality = True -strict_concatenate = True +extra_checks = True diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index c7a461e003b..66734f58b23 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Any, Callable, List, Union # isort: off -import requests # type: ignore +import requests # isort: on @@ -90,7 +90,8 @@ def get_gh_api( ratelimit_exceeded = ( e.response.status_code == 403 and b"rate limit exceeded" - in e.response._content # pylint:disable=protected-access + # pylint:disable-next=protected-access + in (e.response._content or b"") ) try_auth = e.response.status_code == 404 if (ratelimit_exceeded or try_auth) and not token_is_set: diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 656198c6985..4b7a931f772 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -9,7 +9,7 @@ from queue import Queue from threading import Thread from typing import Any, Dict, List, Optional, Tuple -import requests # type: ignore +import requests from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description from lambda_shared.token import get_cached_access_token diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py index 5cb1e45dd14..147ff127275 100644 --- a/tests/ci/ci_runners_metrics_lambda/app.py +++ b/tests/ci/ci_runners_metrics_lambda/app.py @@ -11,7 +11,7 @@ import sys from datetime import datetime from typing import Dict, List -import requests # type: ignore +import requests import boto3 # type: ignore from botocore.exceptions import ClientError # type: ignore diff --git a/tests/ci/clean_lost_instances_lambda/app.py b/tests/ci/clean_lost_instances_lambda/app.py index 65f6ff78d4a..5ec929a59e0 100644 --- a/tests/ci/clean_lost_instances_lambda/app.py +++ b/tests/ci/clean_lost_instances_lambda/app.py @@ -12,7 +12,7 @@ from datetime import datetime from dataclasses import dataclass from typing import Dict, List -import requests # type: ignore +import requests import boto3 # type: ignore from botocore.exceptions import ClientError # type: ignore diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 3ab41673dec..637c4519d3d 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -6,7 +6,7 @@ import time from pathlib import Path from typing import Dict, List, Optional -import requests # type: ignore +import requests from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import TestResults diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 26223de2f8a..550301e8fa2 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -3,7 +3,7 @@ import os import logging -import requests # type: ignore +import requests from requests.adapters import HTTPAdapter # type: ignore from urllib3.util.retry import Retry # type: ignore diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index b74432425d3..bc0cb975ef5 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -4,7 +4,7 @@ import logging import re from typing import List, Optional, Tuple -import requests # type: ignore +import requests CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" CLICKHOUSE_PACKAGE_URL = ( diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 01263c3085d..011ecff635e 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Any, List import boto3 # type: ignore -import requests # type: ignore +import requests from build_download_helper import ( download_build_with_progress, get_build_name_for_check, diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index 043a0310d11..8b53f9dcb23 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -8,7 +8,7 @@ from collections import namedtuple from typing import Any, Dict, Iterable, List, Optional import boto3 # type: ignore -import requests # type: ignore +import requests RUNNER_TYPE_LABELS = [ "builder", diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index 6d5653f6a58..f9860f6ad2a 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -6,7 +6,7 @@ from typing import Tuple import boto3 # type: ignore import jwt -import requests # type: ignore +import requests from . import cached_value_is_valid diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py index 6544eee9581..03d11809d07 100644 --- a/tests/ci/runner_token_rotation_lambda/app.py +++ b/tests/ci/runner_token_rotation_lambda/app.py @@ -4,7 +4,7 @@ import argparse import sys import boto3 # type: ignore -import requests # type: ignore +import requests from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app diff --git a/tests/ci/slack_bot_ci_lambda/app.py b/tests/ci/slack_bot_ci_lambda/app.py index 45e14138335..9e39f937bf2 100755 --- a/tests/ci/slack_bot_ci_lambda/app.py +++ b/tests/ci/slack_bot_ci_lambda/app.py @@ -17,12 +17,12 @@ It's deployed to slack-bot-ci-lambda in CI/CD account See also: https://aretestsgreenyet.com/ """ -import os -import json import base64 +import json +import os import random -import requests # type: ignore +import requests DRY_RUN_MARK = "" @@ -139,13 +139,11 @@ def get_play_url(query): def run_clickhouse_query(query): - url = "https://play.clickhouse.com/?user=play&query=" + requests.utils.quote(query) + url = "https://play.clickhouse.com/?user=play&query=" + requests.compat.quote(query) res = requests.get(url) if res.status_code != 200: print("Failed to execute query: ", res.status_code, res.content) - raise Exception( - "Failed to execute query: {}: {}".format(res.status_code, res.content) - ) + res.raise_for_status() lines = res.text.strip().splitlines() return [x.split("\t") for x in lines] @@ -283,11 +281,7 @@ def send_to_slack_impl(message): res = requests.post(SLACK_URL, json.dumps(payload)) if res.status_code != 200: print("Failed to send a message to Slack: ", res.status_code, res.content) - raise Exception( - "Failed to send a message to Slack: {}: {}".format( - res.status_code, res.content - ) - ) + res.raise_for_status() def send_to_slack(message): diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py index f562fbe101d..8d3a3502861 100644 --- a/tests/ci/team_keys_lambda/app.py +++ b/tests/ci/team_keys_lambda/app.py @@ -7,7 +7,7 @@ from datetime import datetime from queue import Queue from threading import Thread -import requests # type: ignore +import requests import boto3 # type: ignore diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 5e68f2d4b53..c72deea7c7e 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -6,7 +6,7 @@ import time from collections import namedtuple from urllib.parse import quote -import requests # type: ignore +import requests from lambda_shared.pr import TRUSTED_CONTRIBUTORS from lambda_shared.token import get_cached_access_token From d2f92483f298491f6380a2754e3810998764628f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 02:21:39 +0100 Subject: [PATCH 257/356] Add even more python code to pylint check, fix lambdas --- pyproject.toml | 3 +- tests/ci/autoscale_runners_lambda/app.py | 4 +- .../cancel_and_rerun_workflow_lambda/app.py | 17 ++++--- tests/ci/ci_runners_metrics_lambda/app.py | 17 ++----- tests/ci/clean_lost_instances_lambda/app.py | 10 ++--- tests/ci/runner_token_rotation_lambda/app.py | 7 +-- tests/ci/slack_bot_ci_lambda/app.py | 45 +++++++++---------- tests/ci/team_keys_lambda/app.py | 9 ++-- tests/ci/terminate_runner_lambda/app.py | 8 ++-- tests/ci/workflow_approve_rerun_lambda/app.py | 14 +++--- utils/check-style/check-style | 10 ++++- 11 files changed, 75 insertions(+), 69 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 277b24d4fd7..3d05abd9ec2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,8 @@ max-statements=200 [tool.pylint.'MESSAGES CONTROL'] # pytest.mark.parametrize is not callable (not-callable) -disable = '''missing-docstring, +disable = ''' + missing-docstring, too-few-public-methods, invalid-name, too-many-arguments, diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py index 26a05ab0af4..1aa2e424320 100644 --- a/tests/ci/autoscale_runners_lambda/app.py +++ b/tests/ci/autoscale_runners_lambda/app.py @@ -51,7 +51,7 @@ class Queue: label: str -def get_scales(runner_type: str) -> Tuple[int, int]: +def get_scales() -> Tuple[int, int]: "returns the multipliers for scaling down and up ASG by types" # Scaling down is quicker on the lack of running jobs than scaling up on # queue @@ -95,7 +95,7 @@ def set_capacity( continue raise ValueError("Queue status is not in ['in_progress', 'queued']") - scale_down, scale_up = get_scales(runner_type) + scale_down, scale_up = get_scales() # With lyfecycle hooks some instances are actually free because some of # them are in 'Terminating:Wait' state effective_capacity = max( diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 4b7a931f772..625936ec5c8 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -1,16 +1,15 @@ #!/usr/bin/env python3 import json -import re import time from base64 import b64decode from collections import namedtuple from queue import Queue from threading import Thread -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional import requests -from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description +from lambda_shared.pr import check_pr_description from lambda_shared.token import get_cached_access_token NEED_RERUN_OR_CANCELL_WORKFLOWS = { @@ -48,16 +47,18 @@ class Worker(Thread): def _exec_get_with_retry(url: str, token: str) -> dict: headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.json() # type: ignore except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute GET request with retries") + raise requests.HTTPError("Cannot execute GET request with retries") from e WorkflowDescription = namedtuple( @@ -215,16 +216,18 @@ def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescripti def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any: headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.post(url, headers=headers, json=json) + response = requests.post(url, headers=headers, json=json, timeout=30) response.raise_for_status() return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute POST request with retry") + raise requests.HTTPError("Cannot execute POST request with retry") from e def exec_workflow_url(urls_to_post, token): diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py index 147ff127275..47161215a97 100644 --- a/tests/ci/ci_runners_metrics_lambda/app.py +++ b/tests/ci/ci_runners_metrics_lambda/app.py @@ -8,23 +8,14 @@ Lambda function to: import argparse import sys -from datetime import datetime -from typing import Dict, List +from typing import Dict -import requests import boto3 # type: ignore -from botocore.exceptions import ClientError # type: ignore - -from lambda_shared import ( - RUNNER_TYPE_LABELS, - RunnerDescription, - RunnerDescriptions, - list_runners, -) +from lambda_shared import RUNNER_TYPE_LABELS, RunnerDescriptions, list_runners from lambda_shared.token import ( + get_access_token_by_key_app, get_cached_access_token, get_key_and_app_from_aws, - get_access_token_by_key_app, ) UNIVERSAL_LABEL = "universal" @@ -162,7 +153,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key elif args.private_key_path: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() else: print("Attempt to get key and id from AWS secret manager") diff --git a/tests/ci/clean_lost_instances_lambda/app.py b/tests/ci/clean_lost_instances_lambda/app.py index 5ec929a59e0..4accc14f7ae 100644 --- a/tests/ci/clean_lost_instances_lambda/app.py +++ b/tests/ci/clean_lost_instances_lambda/app.py @@ -8,14 +8,13 @@ Lambda function to: import argparse import sys -from datetime import datetime from dataclasses import dataclass +from datetime import datetime from typing import Dict, List -import requests import boto3 # type: ignore +import requests from botocore.exceptions import ClientError # type: ignore - from lambda_shared import ( RUNNER_TYPE_LABELS, RunnerDescription, @@ -23,9 +22,9 @@ from lambda_shared import ( list_runners, ) from lambda_shared.token import ( + get_access_token_by_key_app, get_cached_access_token, get_key_and_app_from_aws, - get_access_token_by_key_app, ) UNIVERSAL_LABEL = "universal" @@ -140,6 +139,7 @@ def delete_runner(access_token: str, runner: RunnerDescription) -> bool: response = requests.delete( f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers, + timeout=30, ) response.raise_for_status() print(f"Response code deleting {runner.name} is {response.status_code}") @@ -325,7 +325,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key elif args.private_key_path: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() else: print("Attempt to get key and id from AWS secret manager") diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py index 03d11809d07..445704648a9 100644 --- a/tests/ci/runner_token_rotation_lambda/app.py +++ b/tests/ci/runner_token_rotation_lambda/app.py @@ -5,8 +5,7 @@ import sys import boto3 # type: ignore import requests - -from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app +from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token def get_runner_registration_token(access_token): @@ -17,6 +16,7 @@ def get_runner_registration_token(access_token): response = requests.post( "https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers, + timeout=30, ) response.raise_for_status() data = response.json() @@ -43,6 +43,7 @@ def main(access_token, push_to_ssm, ssm_parameter_name): def handler(event, context): + _, _ = event, context main(get_cached_access_token(), True, "github_runner_registration_token") @@ -85,7 +86,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key else: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() token = get_access_token_by_key_app(private_key, args.app_id) diff --git a/tests/ci/slack_bot_ci_lambda/app.py b/tests/ci/slack_bot_ci_lambda/app.py index 9e39f937bf2..94b71724b1c 100755 --- a/tests/ci/slack_bot_ci_lambda/app.py +++ b/tests/ci/slack_bot_ci_lambda/app.py @@ -140,7 +140,7 @@ def get_play_url(query): def run_clickhouse_query(query): url = "https://play.clickhouse.com/?user=play&query=" + requests.compat.quote(query) - res = requests.get(url) + res = requests.get(url, timeout=30) if res.status_code != 200: print("Failed to execute query: ", res.status_code, res.content) res.raise_for_status() @@ -157,9 +157,9 @@ def split_broken_and_flaky_tests(failed_tests): flaky_tests = [] for name, report, count_prev_str, count_str in failed_tests: count_prev, count = int(count_prev_str), int(count_str) - if (2 <= count and count_prev < 2) or (count_prev == 1 and count == 1): + if (count_prev < 2 <= count) or (count_prev == count == 1): # It failed 2 times or more within extended time window, it's definitely broken. - # 2 <= count_prev means that it was not reported as broken on previous runs + # 2 <= count means that it was not reported as broken on previous runs broken_tests.append([name, report]) elif 0 < count and count_prev == 0: # It failed only once, can be a rare flaky test @@ -170,19 +170,18 @@ def split_broken_and_flaky_tests(failed_tests): def format_failed_tests_list(failed_tests, failure_type): if len(failed_tests) == 1: - res = "There is a new {} test:\n".format(failure_type) + res = f"There is a new {failure_type} test:\n" else: - res = "There are {} new {} tests:\n".format(len(failed_tests), failure_type) + res = f"There are {len(failed_tests)} new {failure_type} tests:\n" for name, report in failed_tests[:MAX_TESTS_TO_REPORT]: cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name)) - res += "- *{}* - <{}|Report> - <{}|CI DB> \n".format( - name, report, cidb_url - ) + res += f"- *{name}* - <{report}|Report> - <{cidb_url}|CI DB> \n" if MAX_TESTS_TO_REPORT < len(failed_tests): - res += "- and {} other tests... :this-is-fine-fire:".format( - len(failed_tests) - MAX_TESTS_TO_REPORT + res += ( + f"- and {len(failed_tests) - MAX_TESTS_TO_REPORT} other " + "tests... :this-is-fine-fire:" ) return res @@ -221,19 +220,16 @@ def get_too_many_failures_message_impl(failures_count): if random.random() < REPORT_NO_FAILURES_PROBABILITY: return None return "Wow, there are *no failures* at all... 0_o" - if curr_failures < MAX_FAILURES: + return_none = ( + curr_failures < MAX_FAILURES + or curr_failures < prev_failures + or (curr_failures - prev_failures) / prev_failures < 0.2 + ) + if return_none: return None if prev_failures < MAX_FAILURES: - return ":alert: *CI is broken: there are {} failures during the last 24 hours*".format( - curr_failures - ) - if curr_failures < prev_failures: - return None - if (curr_failures - prev_failures) / prev_failures < 0.2: - return None - return "CI is broken and it's getting worse: there are {} failures during the last 24 hours".format( - curr_failures - ) + return f":alert: *CI is broken: there are {curr_failures} failures during the last 24 hours*" + return "CI is broken and it's getting worse: there are {curr_failures} failures during the last 24 hours" def get_too_many_failures_message(failures_count): @@ -252,7 +248,7 @@ def get_failed_checks_percentage_message(percentage): return None msg = ":alert: " if p > 1 else "Only " if p < 0.5 else "" - msg += "*{0:.2f}%* of all checks in master have failed yesterday".format(p) + msg += f"*{p:.2f}%* of all checks in master have failed yesterday" return msg @@ -278,7 +274,7 @@ def send_to_slack_impl(message): payload = SLACK_MESSAGE_JSON.copy() payload["text"] = message - res = requests.post(SLACK_URL, json.dumps(payload)) + res = requests.post(SLACK_URL, json.dumps(payload), timeout=30) if res.status_code != 200: print("Failed to send a message to Slack: ", res.status_code, res.content) res.raise_for_status() @@ -297,7 +293,7 @@ def query_and_alert_if_needed(query, get_message_func): if msg is None: return - msg += "\nCI DB query: <{}|link>".format(get_play_url(query)) + msg += f"\nCI DB query: <{get_play_url(query)}|link>" print("Sending message to slack:", msg) send_to_slack(msg) @@ -311,6 +307,7 @@ def check_and_alert(): def handler(event, context): + _, _ = event, context try: check_and_alert() return {"statusCode": 200, "body": "OK"} diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py index 8d3a3502861..c056808a9b4 100644 --- a/tests/ci/team_keys_lambda/app.py +++ b/tests/ci/team_keys_lambda/app.py @@ -2,13 +2,12 @@ import argparse import json - from datetime import datetime from queue import Queue from threading import Thread -import requests import boto3 # type: ignore +import requests class Keys(set): @@ -34,7 +33,7 @@ class Worker(Thread): m = self.queue.get() if m == "": break - response = requests.get(f"https://github.com/{m}.keys") + response = requests.get(f"https://github.com/{m}.keys", timeout=30) self.results.add(f"# {m}\n{response.text}\n") self.queue.task_done() @@ -45,7 +44,9 @@ def get_org_team_members(token: str, org: str, team_slug: str) -> set: "Accept": "application/vnd.github.v3+json", } response = requests.get( - f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", headers=headers + f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", + headers=headers, + timeout=30, ) response.raise_for_status() data = response.json() diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index 010f7dd6734..ab3e33d9e11 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -8,8 +8,7 @@ from dataclasses import dataclass from typing import Any, Dict, List import boto3 # type: ignore - -from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid +from lambda_shared import RunnerDescriptions, cached_value_is_valid, list_runners from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token @@ -134,7 +133,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: candidates = instances_by_zone[zone] total_to_kill += num_to_kill if num_to_kill > len(candidates): - raise Exception( + raise RuntimeError( f"Required to kill {num_to_kill}, but have only {len(candidates)}" f" candidates in AV {zone}" ) @@ -196,6 +195,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: def handler(event: dict, context: Any) -> Dict[str, List[str]]: + _ = context return main(get_cached_access_token(), event) @@ -226,7 +226,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key else: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() token = get_access_token_by_key_app(private_key, args.app_id) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index c72deea7c7e..8ed47a54ab1 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -90,26 +90,29 @@ def is_trusted_contributor(pr_user_login, pr_user_orgs): def _exec_get_with_retry(url, token): headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute GET request with retries") + raise requests.HTTPError("Cannot execute GET request with retries") from e def _exec_post_with_retry(url, token, data=None): headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: if data: - response = requests.post(url, headers=headers, json=data) + response = requests.post(url, headers=headers, json=data, timeout=30) else: - response = requests.post(url, headers=headers) + response = requests.post(url, headers=headers, timeout=30) if response.status_code == 403: data = response.json() if ( @@ -123,9 +126,10 @@ def _exec_post_with_retry(url, token, data=None): return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute POST request with retry") + raise requests.HTTPError("Cannot execute POST request with retry") from e def _get_pull_requests_from(repo_url, owner, branch, token): diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 620aec2fda2..d1fb2d81a28 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -152,7 +152,15 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | grep -vP $EXCLUDE_DIRS | xargs xmllint --noout --nonet -pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/**/*.py +function xargs-pylint { + # $1 is number maximum arguments per pylint process + sort | awk '$2=="text/x-script.python" {print $1}' | \ + xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/.pylintrc" --persistent=no --score=n +} + +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo +find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | From 859044221f0dc490d1464b8fe8adc887e8c842ac Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 12:23:04 +0100 Subject: [PATCH 258/356] Fix tests/integration/runner linter issues --- tests/integration/runner | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/tests/integration/runner b/tests/integration/runner index b1193b5b471..f1d5198f545 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -39,9 +39,7 @@ def check_args_and_update_paths(args): else: CLICKHOUSE_ROOT = args.clickhouse_root else: - logging.info( - "ClickHouse root is not set. Will use %s" % (DEFAULT_CLICKHOUSE_ROOT) - ) + logging.info("ClickHouse root is not set. Will use %s", DEFAULT_CLICKHOUSE_ROOT) CLICKHOUSE_ROOT = DEFAULT_CLICKHOUSE_ROOT if not os.path.isabs(args.binary): @@ -74,9 +72,7 @@ def check_args_and_update_paths(args): args.base_configs_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, CONFIG_DIR_IN_REPO) ) - logging.info( - "Base configs dir is not set. Will use %s" % (args.base_configs_dir) - ) + logging.info("Base configs dir is not set. Will use %s", args.base_configs_dir) if args.cases_dir: if not os.path.isabs(args.cases_dir): @@ -87,7 +83,7 @@ def check_args_and_update_paths(args): args.cases_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, INTEGRATION_DIR_IN_REPO) ) - logging.info("Cases dir is not set. Will use %s" % (args.cases_dir)) + logging.info("Cases dir is not set. Will use %s", args.cases_dir) if args.utils_dir: if not os.path.isabs(args.utils_dir): @@ -98,12 +94,13 @@ def check_args_and_update_paths(args): args.utils_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, UTILS_DIR_IN_REPO) ) - logging.info("utils dir is not set. Will use %s" % (args.utils_dir)) + logging.info("utils dir is not set. Will use %s", args.utils_dir) logging.info( - "base_configs_dir: {}, binary: {}, cases_dir: {} ".format( - args.base_configs_dir, args.binary, args.cases_dir - ) + "base_configs_dir: %s, binary: %s, cases_dir: %s ", + args.base_configs_dir, + args.binary, + args.cases_dir, ) for path in [ @@ -115,7 +112,7 @@ def check_args_and_update_paths(args): CLICKHOUSE_ROOT, ]: if not os.path.exists(path): - raise Exception("Path {} doesn't exist".format(path)) + raise FileNotFoundError(f"Path {path} doesn't exist") if args.dockerd_volume: if not os.path.isabs(args.dockerd_volume): @@ -126,21 +123,22 @@ def check_args_and_update_paths(args): if (not os.path.exists(os.path.join(args.base_configs_dir, "config.xml"))) and ( not os.path.exists(os.path.join(args.base_configs_dir, "config.yaml")) ): - raise Exception( - "No config.xml or config.yaml in {}".format(args.base_configs_dir) + raise FileNotFoundError( + f"No config.xml or config.yaml in {args.base_configs_dir}" ) if (not os.path.exists(os.path.join(args.base_configs_dir, "users.xml"))) and ( not os.path.exists(os.path.join(args.base_configs_dir, "users.yaml")) ): - raise Exception( - "No users.xml or users.yaml in {}".format(args.base_configs_dir) + raise FileNotFoundError( + f"No users.xml or users.yaml in {args.base_configs_dir}" ) def docker_kill_handler_handler(signum, frame): + _, _ = signum, frame subprocess.check_call( - "docker ps --all --quiet --filter name={name}".format(name=CONTAINER_NAME), + f"docker ps --all --quiet --filter name={CONTAINER_NAME}", shell=True, ) raise KeyboardInterrupt("Killed by Ctrl+C") @@ -318,7 +316,7 @@ if __name__ == "__main__": parallel_args = "" if args.parallel: parallel_args += "--dist=loadfile" - parallel_args += " -n {}".format(args.parallel) + parallel_args += f" -n {args.parallel}".format() rand_args = "" # if not args.no_random: @@ -326,7 +324,7 @@ if __name__ == "__main__": net = "" if args.network: - net = "--net={}".format(args.network) + net = f"--net={args.network}" elif not args.disable_net_host: net = "--net=host" @@ -350,9 +348,7 @@ if __name__ == "__main__": dockerd_internal_volume = "--tmpfs /var/lib/docker -e DOCKER_RAMDISK=true" elif args.dockerd_volume: dockerd_internal_volume = ( - "--mount type=bind,source={},target=/var/lib/docker".format( - args.dockerd_volume - ) + f"--mount type=bind,source={args.dockerd_volume},target=/var/lib/docker" ) else: try: From 5524f5901f4c22e7e97e1120410e7f47577e07e1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 16:18:03 +0100 Subject: [PATCH 259/356] Cleanup code --- src/Analyzer/ConstantNode.cpp | 3 +- src/Analyzer/ConstantNode.h | 2 + src/Planner/PlannerActionsVisitor.cpp | 69 +++++++++++++++++++++++++-- src/Planner/PlannerContext.cpp | 8 +--- src/Planner/PlannerContext.h | 13 +++-- 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index e3b87edbdc6..b6940271b1e 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -1,9 +1,10 @@ #include +#include + #include #include #include -#include "Analyzer/FunctionNode.h" #include #include diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 18090c56630..45f85cec4a3 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,8 +75,10 @@ public: return constant_value->getType(); } + /// Check if convertation to AST requires wrapping with _CAST function. bool requiresCastCall() const; + /// Check if constant is a result of _CAST function constant folding. bool receivedFromInitiatorServer() const; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index e5610dd6fe7..52971539dd7 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,7 +44,12 @@ namespace ErrorCodes namespace { -String calculateActionNodeNameForConstant(const ConstantNode & constant_node) +/* Calculates Action node name for ConstantNode. + * + * If converting to AST will add a '_CAST' function call, + * the result action name will also include it. + */ +String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node) { WriteBufferFromOwnString buffer; if (constant_node.requiresCastCall()) @@ -104,16 +109,43 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ if (planner_context.isASTLevelOptimizationAllowed()) { - result = calculateActionNodeNameForConstant(constant_node); + result = calculateActionNodeNameWithCastIfNeeded(constant_node); } else { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) - result = calculateActionNodeNameForConstant(constant_node); + result = calculateActionNodeNameWithCastIfNeeded(constant_node); else result = calculateActionNodeName(constant_node.getSourceExpression()); } @@ -560,16 +592,43 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto constant_node_name = [&]() { + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ if (planner_context->isASTLevelOptimizationAllowed()) { - return calculateActionNodeNameForConstant(constant_node); + return calculateActionNodeNameWithCastIfNeeded(constant_node); } else { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) - return calculateActionNodeNameForConstant(constant_node); + return calculateActionNodeNameWithCastIfNeeded(constant_node); else return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); } diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 57db84d5031..f33255f0a44 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -4,7 +4,6 @@ #include #include #include -#include "Interpreters/SelectQueryOptions.h" namespace DB { @@ -46,7 +45,7 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) - , select_query_options(select_query_options_) + , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) @@ -119,11 +118,6 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } -bool PlannerContext::isASTLevelOptimizationAllowed() const -{ - return !(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options.ignore_ast_optimizations); -} - PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { const auto set_source_hash = set_source_node->getTreeHash(); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index e7fa7abf2b6..4d9ba037cac 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -111,11 +111,6 @@ public: return global_planner_context; } - const SelectQueryOptions & getSelectQueryOptions() const - { - return select_query_options; - } - /// Get or create table expression data for table expression node. TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); @@ -172,7 +167,11 @@ public: PreparedSets & getPreparedSets() { return prepared_sets; } - bool isASTLevelOptimizationAllowed() const; + /// Returns false if any of following conditions met: + /// 1. Query is executed on a follower node. + /// 2. ignore_ast_optimizations is set. + bool isASTLevelOptimizationAllowed() const { return is_ast_level_optimization_allowed; } + private: /// Query context ContextMutablePtr query_context; @@ -180,7 +179,7 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; - SelectQueryOptions select_query_options; + bool is_ast_level_optimization_allowed; /// Column node to column identifier std::unordered_map column_node_to_column_identifier; From c4009a16f90920e3ce59b1376d5bc7529aa8d450 Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Tue, 5 Mar 2024 22:29:50 +0800 Subject: [PATCH 260/356] Refactor OptimizeDateOrDateTimeConverterWithPreimageVisitor The generateOptimizedDateFilter function is refactored to enhance the code readability. And this commit also fixes the duplicate creations of ColumnNode. --- ...ateOrDateTimeConverterWithPreimagePass.cpp | 88 ++-- ...mizations_ast_query_tree_rewrite.reference | 424 +++++++++--------- 2 files changed, 241 insertions(+), 271 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index cc334cde9c7..0c37749c706 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -94,7 +94,8 @@ public: if (!func_node || func_node->getArguments().getNodes().size() != 1) return; - const auto * column_id = func_node->getArguments().getNodes()[0]->as(); + const auto & argument_node = func_node->getArguments().getNodes()[0]; + const auto * column_id = argument_node->as(); if (!column_id) return; @@ -119,7 +120,7 @@ public: if (!preimage_range) return; - const auto new_node = generateOptimizedDateFilter(comparator, *column_id, *preimage_range); + const auto new_node = generateOptimizedDateFilter(comparator, argument_node, *preimage_range); if (!new_node) return; @@ -128,20 +129,22 @@ public: } private: - QueryTreeNodePtr - generateOptimizedDateFilter(const String & comparator, const ColumnNode & column_node, const std::pair & range) const + QueryTreeNodePtr generateOptimizedDateFilter( + const String & comparator, const QueryTreeNodePtr & column_node, const std::pair & range) const { const DateLUTImpl & date_lut = DateLUT::instance("UTC"); String start_date_or_date_time; String end_date_or_date_time; - if (isDateOrDate32(column_node.getColumnType().get())) + const auto & column_node_typed = column_node->as(); + const auto & column_type = column_node_typed.getColumnType().get(); + if (isDateOrDate32(column_type)) { start_date_or_date_time = date_lut.dateToString(range.first.get()); end_date_or_date_time = date_lut.dateToString(range.second.get()); } - else if (isDateTime(column_node.getColumnType().get()) || isDateTime64(column_node.getColumnType().get())) + else if (isDateTime(column_type) || isDateTime64(column_type)) { start_date_or_date_time = date_lut.timeToString(range.first.get()); end_date_or_date_time = date_lut.timeToString(range.second.get()); @@ -151,69 +154,29 @@ private: if (comparator == "equals") { - const auto lhs = std::make_shared("greaterOrEquals"); - lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); - - const auto rhs = std::make_shared("less"); - rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); - - const auto new_date_filter = std::make_shared("and"); - new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode( + "and", + createFunctionNode("greaterOrEquals", column_node, std::make_shared(start_date_or_date_time)), + createFunctionNode("less", column_node, std::make_shared(end_date_or_date_time))); } else if (comparator == "notEquals") { - const auto lhs = std::make_shared("less"); - lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); - - const auto rhs = std::make_shared("greaterOrEquals"); - rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); - - const auto new_date_filter = std::make_shared("or"); - new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode( + "or", + createFunctionNode("less", column_node, std::make_shared(start_date_or_date_time)), + createFunctionNode("greaterOrEquals", column_node, std::make_shared(end_date_or_date_time))); } else if (comparator == "greater") { - const auto new_date_filter = std::make_shared("greaterOrEquals"); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode("greaterOrEquals", column_node, std::make_shared(end_date_or_date_time)); } else if (comparator == "lessOrEquals") { - const auto new_date_filter = std::make_shared("less"); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode("less", column_node, std::make_shared(end_date_or_date_time)); } else if (comparator == "less" || comparator == "greaterOrEquals") { - const auto new_date_filter = std::make_shared(comparator); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode(comparator, column_node, std::make_shared(start_date_or_date_time)); } else [[unlikely]] { @@ -224,10 +187,17 @@ private: } } - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + template + QueryTreeNodePtr createFunctionNode(const String & function_name, Args &&... args) const { auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); + const auto function_node = std::make_shared(function_name); + auto & new_arguments = function_node->getArguments().getNodes(); + new_arguments.reserve(sizeof...(args)); + (new_arguments.push_back(std::forward(args)), ...); + function_node->resolveAsFunction(function->build(function_node->getArgumentColumns())); + + return function_node; } }; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference index 63658890119..fca48238778 100644 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference @@ -24,21 +24,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -66,21 +66,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -244,21 +244,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1998-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1998-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -289,34 +289,34 @@ QUERY id: 0 FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - FUNCTION id: 20, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 22, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 23, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 24, function_name: less, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 27, constant_value: \'1995-01-01\', constant_value_type: String - FUNCTION id: 28, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 24, constant_value: \'1995-01-01\', constant_value_type: String + FUNCTION id: 25, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - FUNCTION id: 30, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 26, nodes: 2 + FUNCTION id: 27, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 31, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 33, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 34, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 28, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 30, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 31, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 35, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 36, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 32, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 33, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1, @@ -346,26 +346,26 @@ QUERY id: 0 FUNCTION id: 11, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 12, nodes: 2 - COLUMN id: 13, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 14, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 15, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 16, nodes: 2 - COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 18, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 19, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 15, nodes: 2 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 20, nodes: 2 - FUNCTION id: 21, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 22, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 25, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 23, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 26, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 24, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -425,22 +425,22 @@ QUERY id: 0 FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 11, nodes: 2 - COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 13, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 12, constant_value: \'1994-01-01\', constant_value_type: String WHERE - FUNCTION id: 14, function_name: and, function_type: ordinary, result_type: UInt8 + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 15, nodes: 2 - FUNCTION id: 16, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 20, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -479,8 +479,8 @@ QUERY id: 0 FUNCTION id: 19, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 20, nodes: 2 - COLUMN id: 21, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 22, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -582,21 +582,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -624,21 +624,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -666,21 +666,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -847,26 +847,26 @@ QUERY id: 0 FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 22, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 19, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 20, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - FUNCTION id: 24, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 28, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 30, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 27, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 28, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -894,21 +894,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -936,21 +936,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -978,21 +978,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -1020,21 +1020,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1062,21 +1062,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1104,19 +1104,19 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 From 55c028322dfb4111baf8d0ad8eac4d48975dd349 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 16:59:22 +0100 Subject: [PATCH 261/356] Fix typo --- src/Analyzer/ConstantNode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 45f85cec4a3..24f0c786980 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,7 +75,7 @@ public: return constant_value->getType(); } - /// Check if convertation to AST requires wrapping with _CAST function. + /// Check if conversion to AST requires wrapping with _CAST function. bool requiresCastCall() const; /// Check if constant is a result of _CAST function constant folding. From 7fe7f3a79dbdf0e24a03bc63a57486e2ae14f7e8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 5 Mar 2024 16:03:02 +0000 Subject: [PATCH 262/356] address review comments --- src/Interpreters/MutationsInterpreter.cpp | 2 +- .../Merges/Algorithms/SummingSortedAlgorithm.cpp | 2 +- .../Optimizations/optimizeUseNormalProjection.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- src/Storages/IStorage.h | 6 +++--- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/IMergeTreeReader.cpp | 4 +++- src/Storages/MergeTree/IMergeTreeReader.h | 2 +- src/Storages/MergeTree/MergeTask.h | 2 +- src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp | 2 +- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeRangeReader.h | 2 +- src/Storages/MergeTree/MergeTreeReadTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeSelectProcessor.cpp | 2 +- src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 2 +- .../{ => MergeTree}/MergeTreeVirtualColumns.cpp | 2 +- src/Storages/{ => MergeTree}/MergeTreeVirtualColumns.h | 0 src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/StorageKeeperMap.cpp | 2 +- src/Storages/StorageLog.cpp | 1 - src/Storages/StorageSnapshot.cpp | 1 - src/Storages/StorageSnapshot.h | 3 --- src/Storages/StorageValues.cpp | 4 ++-- src/Storages/StorageValues.h | 2 +- src/Storages/System/StorageSystemDictionaries.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 10 +++++----- src/Storages/VirtualColumnsDescription.h | 4 +++- 30 files changed, 38 insertions(+), 39 deletions(-) rename src/Storages/{ => MergeTree}/MergeTreeVirtualColumns.cpp (96%) rename src/Storages/{ => MergeTree}/MergeTreeVirtualColumns.h (100%) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6641c6b740c..3b1a499255b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 6253d3058aa..28160b18269 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 73caf70627f..cac172a856f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -135,7 +135,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) std::list candidates; NormalProjectionCandidate * best_candidate = nullptr; - const Names & required_columns = reading->getRealColumnNames(); + const Names & required_columns = reading->getAllColumnNames(); const auto & parts = reading->getParts(); const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 1d7c2d46361..5ed742a9bfd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -133,7 +133,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; - const Names & getRealColumnNames() const { return all_column_names; } + const Names & getAllColumnNames() const { return all_column_names; } StorageID getStorageID() const { return data.getStorageID(); } UInt64 getSelectedParts() const { return selected_parts; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f8d73038e09..1108eafc6b6 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -214,9 +214,9 @@ public: metadata.set(std::make_unique(metadata_)); } - void setVirtuals(const VirtualColumnsDescription & virtuals_) + void setVirtuals(VirtualColumnsDescription virtuals_) { - virtuals.set(std::make_unique(virtuals_)); + virtuals.set(std::make_unique(std::move(virtuals_))); } /// Return list of virtual columns (like _part, _table, etc). In the vast @@ -275,7 +275,7 @@ private: /// Multiversion storage metadata. Allows to read/write storage metadata without locks. MultiVersionStorageMetadataPtr metadata; - /// TODO: + /// Description of virtual columns. Optional, may be set in constructor. MultiVersionVirtualsDescriptionPtr virtuals; protected: diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index cef385c6f98..476b0a7e183 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -220,7 +220,7 @@ StorageLiveView::StorageLiveView( VirtualColumnsDescription virtuals; virtuals.addEphemeral("_version", std::make_shared(), ""); - setVirtuals(virtuals); + setVirtuals(std::move(virtuals)); if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 30be1aa1c56..4936f1d33c6 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -68,6 +68,8 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const { + chassert(columns.size() == requested_columns.size()); + const auto * loaded_part_info = typeid_cast(data_part_info_for_read.get()); if (!loaded_part_info) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader"); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 32877cd88eb..a5b84eba241 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -117,7 +117,7 @@ private: /// Actual columns description in part. const ColumnsDescription & part_columns; - /// TODO: + /// Fields of virtual columns that were filled in previous stages. VirtualFields virtual_fields; }; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 28a3c671914..1f50e55f8a0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -15,7 +15,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index e84ed0a8068..67f5e7a53e8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7b1d08642e2..d98e961f96b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 059caebcfc8..c19b4ddd8a2 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index ff86ec01efa..c78f7579637 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1153,13 +1153,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t size_t pos = read_sample_block.getPositionByName("_part_offset"); chassert(pos < result.columns.size()); chassert(result.columns[pos] == nullptr); - result.columns[pos] = fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + result.columns[pos] = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); } return result; } -ColumnPtr MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) +ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) { size_t num_rows = result.numReadRows(); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index ef861fc5b4d..688a6b0922b 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -308,7 +308,7 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result) const; - ColumnPtr fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); + ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index 64fd37d14b1..08b30e445e2 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 8a24e150bae..fce733d47b7 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 0d6b1d88075..e5545a92aea 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp similarity index 96% rename from src/Storages/MergeTreeVirtualColumns.cpp rename to src/Storages/MergeTree/MergeTreeVirtualColumns.cpp index 8250ceda7fa..b87dccc2b18 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Storages/MergeTreeVirtualColumns.h b/src/Storages/MergeTree/MergeTreeVirtualColumns.h similarity index 100% rename from src/Storages/MergeTreeVirtualColumns.h rename to src/Storages/MergeTree/MergeTreeVirtualColumns.h diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 56f832630b7..a5b8a2a2a6d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 69e6085bab9..4749303e4c2 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -333,7 +333,7 @@ StorageKeeperMap::StorageKeeperMap( VirtualColumnsDescription virtuals; virtuals.addEphemeral(String(version_column_name), std::make_shared(), ""); - setVirtuals(virtuals); + setVirtuals(std::move(virtuals)); WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 7d959b05c94..5e275357022 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index e5c1d3d1dea..222173fd66b 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 390a5037780..63e4cbd99f6 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -84,9 +84,6 @@ struct StorageSnapshot /// If we have a projection then we should use its metadata. StorageMetadataPtr getMetadataForQuery() const { return projection ? projection->metadata : metadata; } - -private: - void init(); }; using StorageSnapshotPtr = std::shared_ptr; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 191cdab1a40..a930ffd1307 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -12,13 +12,13 @@ StorageValues::StorageValues( const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, - const VirtualColumnsDescription & virtuals_) + VirtualColumnsDescription virtuals_) : IStorage(table_id_), res_block(res_block_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); - setVirtuals(virtuals_); + setVirtuals(std::move(virtuals_)); } Pipe StorageValues::read( diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 0db3ecec888..ebb182ab667 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -18,7 +18,7 @@ public: const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, - const VirtualColumnsDescription & virtuals_ = {}); + VirtualColumnsDescription virtuals_ = {}); std::string getName() const override { return "Values"; } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index d60cfcafc13..99cdc019fe5 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -56,7 +56,7 @@ StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_i { VirtualColumnsDescription virtuals; virtuals.addEphemeral("key", std::make_shared(), ""); - setVirtuals(virtuals); + setVirtuals(std::move(virtuals)); } ColumnsDescription StorageSystemDictionaries::getColumnsDescription() diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 075e1c62323..78a17b974e1 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -262,7 +262,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Colu VirtualColumnsDescription virtuals; virtuals.addEphemeral("_state", std::make_shared(), ""); - setVirtuals(virtuals); + setVirtuals(std::move(virtuals)); } } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 21f05953714..9c462217007 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -352,17 +352,17 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription { VirtualColumnsDescription desc; - auto add_virtual = [&](const auto & name, const auto & type, const auto & comment) + auto add_virtual = [&](const auto & name, const auto & type) { if (storage_columns.has(name)) return; - desc.addEphemeral(name, type, comment); + desc.addEphemeral(name, type, ""); }; - add_virtual("_path", std::make_shared(std::make_shared()), ""); - add_virtual("_file", std::make_shared(std::make_shared()), ""); - add_virtual("_size", makeNullable(std::make_shared()), ""); + add_virtual("_path", std::make_shared(std::make_shared())); + add_virtual("_file", std::make_shared(std::make_shared())); + add_virtual("_size", makeNullable(std::make_shared())); return desc; } diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h index 2f46bbcab82..e11ba706718 100644 --- a/src/Storages/VirtualColumnsDescription.h +++ b/src/Storages/VirtualColumnsDescription.h @@ -7,7 +7,6 @@ namespace DB struct VirtualColumnDescription : public ColumnDescription { -public: using Self = VirtualColumnDescription; VirtualsKind kind; @@ -16,6 +15,9 @@ public: bool isEphemeral() const { return kind == VirtualsKind::Ephemeral; } bool isPersistent() const { return kind == VirtualsKind::Persistent; } + + /// This method is needed for boost::multi_index because field + /// of base class cannot be referenced in boost::multi_index::member. const String & getName() const { return name; } }; From 8b2bd2125b58074c3b7169f531a56a64a5769dcd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 15 Dec 2023 18:21:14 +0100 Subject: [PATCH 263/356] no warnings at drop --- src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++- .../0_stateless/02922_deduplication_with_zero_copy.sh | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36e9ee22f6d..73c7e484bcb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1115,7 +1115,10 @@ void StorageReplicatedMergeTree::dropZookeeperZeroCopyLockPaths(zkutil::ZooKeepe { LOG_INFO(logger, "Zero copy locks directory {} is absent on ZooKeeper.", zero_copy_locks_root); } - chassert(code == Coordination::Error::ZOK); + else + { + chassert(code == Coordination::Error::ZOK); + } } } diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh index 381311c5033..bb013dccb65 100755 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh @@ -137,8 +137,8 @@ function list_keeper_nodes() { list_keeper_nodes "${table_shared_id}" -$CLICKHOUSE_CLIENT -nm -q "drop table r1;" & -$CLICKHOUSE_CLIENT -nm -q "drop table r2;" & +$CLICKHOUSE_CLIENT -nm -q "drop table r1;" --send_logs_level="error" & +$CLICKHOUSE_CLIENT -nm -q "drop table r2;" --send_logs_level="error" & wait list_keeper_nodes "${table_shared_id}" From e1851987d8c715ad039d092e6b398b722eda6c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 5 Mar 2024 19:35:39 +0100 Subject: [PATCH 264/356] Make tidy happy --- src/Parsers/ASTQueryWithOutput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index e47f4dcf29d..c57aa759969 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -78,7 +78,7 @@ bool ASTQueryWithOutput::resetOutputASTIfExist(IAST & ast) { if (p) { - if (auto it = std::find(ast_with_output->children.begin(), ast_with_output->children.end(), p); + if (auto * it = std::find(ast_with_output->children.begin(), ast_with_output->children.end(), p); it != ast_with_output->children.end()) ast_with_output->children.erase(it); p.reset(); From 3679dc10c21c20af2a110561d2d652a805a943c7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 5 Mar 2024 18:41:29 +0000 Subject: [PATCH 265/356] Fixing tests. --- tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index 809c5921d95..808eaf291d5 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -7,7 +7,7 @@ SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {ser SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 43} -- non-const precision +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 43, 44} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184012345678910111213141516171819Z', 3, 'UTC'); -- {serverError 6} From 4f6cb21d51695aa6dd8fd7278d308f93a4d7b164 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 5 Mar 2024 18:57:24 +0100 Subject: [PATCH 266/356] set priority when scheduling task in thread pool --- src/Interpreters/AsynchronousInsertQueue.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index e25cedb916c..7d56dbabe3c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -254,11 +254,17 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() void AsynchronousInsertQueue::scheduleDataProcessingJob( const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num) { + /// Intuitively it seems reasonable to process first inserted blocks first. + /// We add new chunks in the end of entries list, so they are automatically ordered by creation time + chassert(!data->entries.empty()); + const auto priority = Priority{data->entries.front()->create_time.time_since_epoch().count()}; + /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. pool.scheduleOrThrowOnError( [this, key, global_context, shard_num, my_data = std::make_shared(std::move(data))]() mutable - { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }); + { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }, + priority); } void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context) @@ -382,6 +388,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr assert(data); auto size_in_bytes = data->size_in_bytes; data->size_in_bytes += entry_data_size; + /// We rely on the fact that entries are being added to the list in order of creation time in `scheduleDataProcessingJob()` data->entries.emplace_back(entry); insert_future = entry->getFuture(); From ba8a2eb10d1a2a2beb0fd72b5191c3f4b2fdbd34 Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Wed, 6 Mar 2024 09:25:01 +0800 Subject: [PATCH 267/356] Fix functional test analyzer_preimage_null ColumnNodes with the same source are expected to share the exact id after the preimage transformation. --- .../02999_analyzer_preimage_null.reference | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02999_analyzer_preimage_null.reference b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference index 6f9afedfd07..c5eb6b23d0d 100644 --- a/tests/queries/0_stateless/02999_analyzer_preimage_null.reference +++ b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference @@ -108,14 +108,14 @@ QUERY id: 0 FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - FUNCTION id: 21, function_name: toYear, function_type: ordinary, result_type: UInt16 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + FUNCTION id: 20, function_name: toYear, function_type: ordinary, result_type: UInt16 ARGUMENTS - LIST id: 22, nodes: 1 - COLUMN id: 23, column_name: date1, result_type: Date, source_id: 3 + LIST id: 21, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 SETTINGS optimize_time_filter_with_preimage=1 From cd2a418afd088563819ed723d7a1ae91110cb895 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 6 Mar 2024 02:31:21 +0000 Subject: [PATCH 268/356] add named collection grant doc --- docs/en/sql-reference/statements/grant.md | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 4e5476210e3..879354d714b 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -202,6 +202,13 @@ Hierarchy of privileges: - `S3` - [dictGet](#grant-dictget) - [displaySecretsInShowAndSelect](#grant-display-secrets) +- [NAMED COLLECTION ADMIN](#grant-named-collection-admin) + - `CREATE NAMED COLLECTION` + - `DROP NAMED COLLECTION` + - `ALTER NAMED COLLECTION` + - `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION` Examples of how this hierarchy is treated: @@ -498,6 +505,23 @@ and [`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) are turned on. +### NAMED COLLECTION ADMIN + +Allows a certain operation on a specified named colleciton. + +- `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` + - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `DROP NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `ALTER NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `SHOW NAMED COLLECTIONS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION` + +**Examples** + +Assuming a named collecion is called abc, we grant privilege CREATE NAMED COLLECTION to user john. +- `GRANT CREATE NAMED COLLECTION ON abc TO john` + ### ALL Grants all the privileges on regulated entity to a user account or a role. From e4b78e7dac963e9fa8d10fcd9956f8df00b3a933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 6 Mar 2024 15:10:45 +0800 Subject: [PATCH 269/356] fix bugs --- src/Functions/multiIf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index f5cb2375c53..81304f3afbd 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -422,7 +422,7 @@ private: res_data.resize_exact(rows); if constexpr (nullable_result) { - if (res_null_map) + if (!res_null_map) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid result null_map while result type is nullable"); res_null_map->resize_exact(rows); From 655f9cb1953c371d7b9a9ba6a58fb4a4fc3ca40e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 6 Mar 2024 08:57:20 +0100 Subject: [PATCH 270/356] Revert "Fix wacky primary key sorting in `SHOW INDEX`" --- .../InterpreterShowIndexesQuery.cpp | 4 ++-- .../0_stateless/02724_show_indexes.reference | 24 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 5be72dc8ce6..e8005ead91e 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -37,7 +37,7 @@ FROM ( name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - row_number() over (order by null) AS seq_in_index, + row_number() over (order by column_name) AS seq_in_index, arrayJoin(splitByString(', ', primary_key)) AS column_name, 'A' AS collation, 0 AS cardinality, @@ -75,7 +75,7 @@ FROM ( database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression, seq_in_index;)", database, table, where_expression); +ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index ac0461fc506..e41f2521f5c 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -2,33 +2,33 @@ tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names @@ -40,8 +40,8 @@ NULL 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b From 06c2492b92ed89c62ae0e3cd3d11478b1aa0f75e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 6 Mar 2024 09:29:16 +0000 Subject: [PATCH 271/356] Clarify subtle detail in docs --- docs/en/sql-reference/statements/alter/column.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 902eae2d67d..0989c151d18 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -278,9 +278,9 @@ Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mut For columns with a new or updated `MATERIALIZED` value expression, all existing rows are rewritten. -For columns with a new or updated `DEFAULT` value expression, the behavior changed in ClickHouse v24.2: -- In ClickHouse < v24.2, all existing rows are changed to the new `DEFAULT` value expression. -- In ClickHouse >= v24.2, only rows containing the previous default value are changed to the new `DEFAULT` value expression. Rows with non-default values are kept as is. +For columns with a new or updated `DEFAULT` value expression, the behavior depends on the ClickHouse version: +- In ClickHouse < v24.2, all existing rows are rewritten. +- ClickHouse >= v24.2 distinguishes if a row value in a column with `DEFAULT` value expression was explicitly specified when it was inserted, or not, i.e. calculated from the `DEFAULT` value expression. If the value was explicitly specified, ClickHouse keeps it as is. If the value was was calculated, ClickHouse changes it to the new or updated `MATERIALIZED` value expression. Syntax: From 93e2ed824816ba5215d3ff5c12c72d9726538a1a Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 6 Mar 2024 11:31:24 +0100 Subject: [PATCH 272/356] Remove useless message from AWS library --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index 5f0542b3ad7..32870e234ca 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 5f0542b3ad7eef25b0540d37d778207e0345ea8f +Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200 From 083a251951e60b8ca4c4d3e12f78a12cff330237 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 6 Mar 2024 12:04:13 +0000 Subject: [PATCH 273/356] Fixes for HashedArrayDictionary --- src/Dictionaries/HashedArrayDictionary.cpp | 15 ++++++++++-- .../HashedDictionaryParallelLoader.h | 24 ++++++++++++++++--- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 56d42a38c8e..eae00c297b1 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -1078,7 +1078,7 @@ void HashedArrayDictionary::calculateBytesAllocate bytes_allocated += container.allocated_bytes(); } - bucket_count = container.capacity(); + bucket_count += container.capacity(); } }; @@ -1089,6 +1089,13 @@ void HashedArrayDictionary::calculateBytesAllocate bytes_allocated += container.size(); } + /// `bucket_count` should be a sum over all shards, + /// but it should not be a sum over all attributes, since it is used to + /// calculate load_factor like this: `element_count / bucket_count` + /// While element_count is a sum over all shards, not over all attributes. + if (attributes.size()) + bucket_count /= attributes.size(); + if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); @@ -1167,11 +1174,15 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) if (shards <= 0 || 128 < shards) throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name); - HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast(shards)}; + Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000); + if (shard_load_queue_backlog <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name); if (source_ptr->hasUpdateField() && shards > 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARDS parameter does not supports for updatable source (UPDATE_FIELD)", full_name); + HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast(shards), static_cast(shard_load_queue_backlog)}; + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); const auto & settings = context->getSettingsRef(); diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h index a256f6de0e0..1b8b7b7f555 100644 --- a/src/Dictionaries/HashedDictionaryParallelLoader.h +++ b/src/Dictionaries/HashedDictionaryParallelLoader.h @@ -62,7 +62,11 @@ public: shards_queues[shard].emplace(backlog); pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()] { + WorkerStatistic statistic; SCOPE_EXIT_SAFE( + LOG_TRACE(dictionary.log, "Finished worker for dictionary {} shard {}, processed {} blocks, {} rows, total time {}ms", + dictionary_name, shard, statistic.total_blocks, statistic.total_rows, statistic.total_elapsed_ms); + if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); ); @@ -74,7 +78,9 @@ public: CurrentThread::attachToGroupIfDetached(thread_group); setThreadName("HashedDictLoad"); - threadWorker(shard); + LOG_TRACE(dictionary.log, "Starting worker for dictionary {}, shard {}", dictionary_name, shard); + + threadWorker(shard, statistic); }); } } @@ -128,7 +134,14 @@ private: std::vector shards_slots; DictionaryKeysArenaHolder arena_holder; - void threadWorker(size_t shard) + struct WorkerStatistic + { + UInt64 total_elapsed_ms = 0; + UInt64 total_blocks = 0; + UInt64 total_rows = 0; + }; + + void threadWorker(size_t shard, WorkerStatistic & statistic) { Block block; DictionaryKeysArenaHolder arena_holder_; @@ -139,8 +152,13 @@ private: Stopwatch watch; dictionary.blockToAttributes(block, arena_holder_, shard); UInt64 elapsed_ms = watch.elapsedMilliseconds(); + + statistic.total_elapsed_ms += elapsed_ms; + statistic.total_blocks += 1; + statistic.total_rows += block.rows(); + if (elapsed_ms > 1'000) - LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {}).", shard, elapsed_ms, block.rows()); + LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {})", shard, elapsed_ms, block.rows()); } if (!shard_queue.isFinished()) From 70750cb10882ff0eaf15da52baa94603cdffc03b Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 6 Mar 2024 12:04:57 +0000 Subject: [PATCH 274/356] Fix possible stuck on error in HashedDictionaryParallelLoader --- .../HashedDictionaryParallelLoader.h | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h index 1b8b7b7f555..c0b4aa73adb 100644 --- a/src/Dictionaries/HashedDictionaryParallelLoader.h +++ b/src/Dictionaries/HashedDictionaryParallelLoader.h @@ -93,8 +93,21 @@ public: for (size_t shard = 0; shard < shards; ++shard) { - if (!shards_queues[shard]->push(std::move(shards_blocks[shard]))) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to shards queue #{}", shard); + const auto & current_block = shards_blocks[shard]; + while (!shards_queues[shard]->tryPush(current_block, /* milliseconds= */ 100)) + { + if (shards_queues[shard]->isFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to finished shards queue #{}, dictionary {}", shard, dictionary_name); + + /// We need to check if some workers failed + if (pool.active() != shards) + { + LOG_DEBUG(dictionary.log, "Some workers for dictionary {} failed, stopping all workers", dictionary_name); + stop_all_workers = true; + pool.wait(); /// We expect exception to be thrown from the failed worker thread + throw Exception(ErrorCodes::LOGICAL_ERROR, "Worker threads for dictionary {} are not active", dictionary_name); + } + } } } @@ -130,6 +143,7 @@ private: String dictionary_name; const size_t shards; ThreadPool pool; + std::atomic_bool stop_all_workers{false}; std::vector>> shards_queues; std::vector shards_slots; DictionaryKeysArenaHolder arena_holder; @@ -147,8 +161,17 @@ private: DictionaryKeysArenaHolder arena_holder_; auto & shard_queue = *shards_queues[shard]; - while (shard_queue.pop(block)) + while (true) { + if (!shard_queue.tryPop(block, /* milliseconds= */ 100)) + { + /// Check if we need to stop + if (stop_all_workers || shard_queue.isFinished()) + break; + /// Timeout expired, but the queue is not finished yet, try again + continue; + } + Stopwatch watch; dictionary.blockToAttributes(block, arena_holder_, shard); UInt64 elapsed_ms = watch.elapsedMilliseconds(); @@ -160,9 +183,6 @@ private: if (elapsed_ms > 1'000) LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {})", shard, elapsed_ms, block.rows()); } - - if (!shard_queue.isFinished()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pull non finished shards queue #{}", shard); } /// Split block to shards smaller block, using 'selector'. From d9f7ac881593dc577b7da864bdabc482337ea378 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 1 Mar 2024 15:41:42 +0000 Subject: [PATCH 275/356] Analyzer: Fix bug with join_use_nulls and PREWHERE --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 23 ++++++++++++++++--- ...ence => 02534_join_prewhere_bug.reference} | 7 ++++++ ..._44062.sql => 02534_join_prewhere_bug.sql} | 5 ++++ 3 files changed, 32 insertions(+), 3 deletions(-) rename tests/queries/0_stateless/{02534_join_prewhere_bug_44062.reference => 02534_join_prewhere_bug.reference} (88%) rename tests/queries/0_stateless/{02534_join_prewhere_bug_44062.sql => 02534_join_prewhere_bug.sql} (92%) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 907a732493d..eb578d17c04 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -718,6 +718,8 @@ struct IdentifierResolveScope group_by_use_nulls = context->getSettingsRef().group_by_use_nulls && (query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube()); } + + join_use_nulls = context->getSettingsRef().join_use_nulls; } QueryTreeNodePtr scope_node; @@ -772,6 +774,8 @@ struct IdentifierResolveScope /// Apply nullability to aggregation keys bool group_by_use_nulls = false; + /// Join retutns NULLs instead of default values + bool join_use_nulls = false; /// JOINs count size_t joins_count = 0; @@ -3286,7 +3290,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo QueryTreeNodePtr resolved_identifier; JoinKind join_kind = from_join_node.getKind(); - bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls; /// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced /// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here. @@ -3451,7 +3454,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo if (join_node_in_resolve_process || !resolved_identifier) return resolved_identifier; - if (join_use_nulls) + if (scope.join_use_nulls) { resolved_identifier = resolved_identifier->clone(); convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side); @@ -4439,7 +4442,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I else matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope); - if (scope.context->getSettingsRef().join_use_nulls) + if (scope.join_use_nulls) { /** If we are resolving matcher came from the result of JOIN and `join_use_nulls` is set, * we need to convert joined column type to Nullable. @@ -7558,8 +7561,22 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier } if (query_node_typed.getPrewhere()) + { + /** Expression in PREWHERE with JOIN should not be modified by join_use_nulls. + * Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE a = 1 + * Column `a` should be resolved from table and should not change its type to Nullable. + */ + bool join_use_nulls = scope.join_use_nulls; + bool use_identifier_lookup_to_result_cache = scope.use_identifier_lookup_to_result_cache; + scope.join_use_nulls = false; + scope.use_identifier_lookup_to_result_cache = false; + resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + scope.join_use_nulls = join_use_nulls; + scope.use_identifier_lookup_to_result_cache = use_identifier_lookup_to_result_cache; + } + if (query_node_typed.getWhere()) resolveExpressionNode(query_node_typed.getWhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference b/tests/queries/0_stateless/02534_join_prewhere_bug.reference similarity index 88% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference rename to tests/queries/0_stateless/02534_join_prewhere_bug.reference index aaef17371d8..115ea994de1 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.reference @@ -34,5 +34,12 @@ ORDER BY test2.col1 ; 5600 123 123 5601 321 -32 +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; +123 124 +-32 -31 +-30 -29 DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql b/tests/queries/0_stateless/02534_join_prewhere_bug.sql similarity index 92% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql rename to tests/queries/0_stateless/02534_join_prewhere_bug.sql index 073f81e4ff3..016c92597ec 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.sql @@ -42,5 +42,10 @@ WHERE test2.col1 IS NOT NULL ORDER BY test2.col1 ; +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; + DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; From 4dede601efe1fd651f4d4cd311fb52895890f936 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 5 Mar 2024 12:08:03 +0000 Subject: [PATCH 276/356] fix --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index eb578d17c04..576a0e68966 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -719,7 +719,10 @@ struct IdentifierResolveScope (query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube()); } - join_use_nulls = context->getSettingsRef().join_use_nulls; + if (context) + join_use_nulls = context->getSettingsRef().join_use_nulls; + else if (parent_scope) + join_use_nulls = parent_scope->join_use_nulls; } QueryTreeNodePtr scope_node; From 381c0753915f43d33b7f3105eb2a5a1072350d0b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 6 Mar 2024 11:48:33 +0100 Subject: [PATCH 277/356] Throw on query timeout --- src/Common/ZooKeeper/ZooKeeperRetries.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperRetries.h b/src/Common/ZooKeeper/ZooKeeperRetries.h index d411549346a..d8d234ba913 100644 --- a/src/Common/ZooKeeper/ZooKeeperRetries.h +++ b/src/Common/ZooKeeper/ZooKeeperRetries.h @@ -5,8 +5,6 @@ #include #include -#include - namespace DB { @@ -220,8 +218,8 @@ private: return false; } - if (process_list_element && !process_list_element->checkTimeLimitSoft()) - return false; + if (process_list_element) + process_list_element->checkTimeLimit(); /// retries logLastError("will retry due to error"); From 314e27a06ebf5df04b211f92e1eaaa34d5b5d9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 6 Mar 2024 12:52:03 +0000 Subject: [PATCH 278/356] Fix groupArraySorted documentation --- .../reference/grouparraysorted.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index cc601c097fe..9bee0c29e7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -14,8 +14,6 @@ - `N` – The number of elements to return. - If the parameter is omitted, default value is the size of input. - - `column` – The value (Integer, String, Float and other Generic types). **Example** @@ -36,13 +34,12 @@ Gets all the String implementations of all numbers in column: ``` sql -SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); +SELECT groupArraySorted(5)(str) FROM (SELECT toString(number) as str FROM numbers(5)); ``` ``` text - ┌─groupArraySorted(str)────────┠- │ ['0','1','2','3','4'] │ - └──────────────────────────────┘ - ``` - \ No newline at end of file +┌─groupArraySorted(5)(str)─┠+│ ['0','1','2','3','4'] │ +└──────────────────────────┘ + ``` \ No newline at end of file From 7e908711d0ff0895a5fbe13c28077f0a3042d49c Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 6 Mar 2024 12:54:39 +0000 Subject: [PATCH 279/356] Respect max_execution_time for dictionary reloading time --- src/Dictionaries/HashedArrayDictionary.cpp | 3 +++ src/Dictionaries/HashedArrayDictionary.h | 1 + src/Dictionaries/HashedDictionary.h | 1 + src/Dictionaries/HashedDictionaryParallelLoader.h | 14 +++++++++++++- src/Dictionaries/registerHashedDictionary.cpp | 1 + 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index eae00c297b1..d09f402143e 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -1189,6 +1189,9 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) const auto * clickhouse_source = dynamic_cast(source_ptr.get()); configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; + if (settings.max_execution_time.totalSeconds() > 0) + configuration.load_timeout = std::chrono::seconds(settings.max_execution_time.totalSeconds()); + if (dictionary_key_type == DictionaryKeyType::Simple) { if (shards > 1) diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 4b2570ad928..9877d92d457 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -29,6 +29,7 @@ struct HashedArrayDictionaryStorageConfiguration size_t shards = 1; size_t shard_load_queue_backlog = 10000; bool use_async_executor = false; + std::chrono::seconds load_timeout{0}; }; template diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 3a5e4ff6306..b3b8cc56868 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -67,6 +67,7 @@ struct HashedDictionaryConfiguration const bool require_nonempty; const DictionaryLifetime lifetime; bool use_async_executor = false; + const std::chrono::seconds load_timeout{0}; }; template diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h index c0b4aa73adb..d88ee88f9a9 100644 --- a/src/Dictionaries/HashedDictionaryParallelLoader.h +++ b/src/Dictionaries/HashedDictionaryParallelLoader.h @@ -31,6 +31,7 @@ template clas namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int TIMEOUT_EXCEEDED; } } @@ -50,9 +51,10 @@ public: , shards(dictionary.configuration.shards) , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards) , shards_queues(shards) + , loading_timeout(dictionary.configuration.load_timeout) { UInt64 backlog = dictionary.configuration.shard_load_queue_backlog; - LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog)", dictionary_name, shards, backlog); + LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog and timeout {} sec)", dictionary_name, shards, backlog, loading_timeout.count()); shards_slots.resize(shards); iota(shards_slots.data(), shards_slots.size(), UInt64(0)); @@ -107,6 +109,13 @@ public: pool.wait(); /// We expect exception to be thrown from the failed worker thread throw Exception(ErrorCodes::LOGICAL_ERROR, "Worker threads for dictionary {} are not active", dictionary_name); } + + if (loading_timeout.count() && std::chrono::milliseconds(total_loading_time.elapsedMilliseconds()) > loading_timeout) + { + stop_all_workers = true; + pool.wait(); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout {} sec for dictionary {} loading is expired", loading_timeout.count(), dictionary_name); + } } } } @@ -145,6 +154,9 @@ private: ThreadPool pool; std::atomic_bool stop_all_workers{false}; std::vector>> shards_queues; + std::chrono::seconds loading_timeout; + Stopwatch total_loading_time; + std::vector shards_slots; DictionaryKeysArenaHolder arena_holder; diff --git a/src/Dictionaries/registerHashedDictionary.cpp b/src/Dictionaries/registerHashedDictionary.cpp index 6b980e2d534..5fc4f5d5cb6 100644 --- a/src/Dictionaries/registerHashedDictionary.cpp +++ b/src/Dictionaries/registerHashedDictionary.cpp @@ -77,6 +77,7 @@ void registerDictionaryHashed(DictionaryFactory & factory) require_nonempty, dict_lifetime, use_async_executor, + std::chrono::seconds(settings.max_execution_time.totalSeconds()), }; if (source_ptr->hasUpdateField() && shards > 1) From ee468b0f2ae6e30b01704aebbfed440c8bec6f11 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 6 Mar 2024 14:26:23 +0000 Subject: [PATCH 280/356] CI: wait less in await to not exceed GH timeout #do_not_test --- tests/ci/ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 1bd607696de..b222e81ad73 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -645,7 +645,8 @@ class CiCache: if not jobs_with_params: return {} poll_interval_sec = 300 - TIMEOUT = 3590 + # TIMEOUT * MAX_ROUNDS_TO_WAIT must be less than 6h (GH job timeout) with a room for rest RunConfig work + TIMEOUT = 3000 # 50 min MAX_ROUNDS_TO_WAIT = 6 MAX_JOB_NUM_TO_WAIT = 3 await_finished: Dict[str, List[int]] = {} From d0e887f8344a8ba70fff50a5d4f23e1a724c3c1b Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 6 Mar 2024 15:02:18 +0000 Subject: [PATCH 281/356] Update index.md --- .../sql-reference/window-functions/index.md | 108 ++++++++++-------- 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 6340c369bff..5bfe22d23a2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -5,7 +5,12 @@ sidebar_label: Window Functions title: Window Functions --- -ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: +Windows functions let you perform calculations across a set of rows that are related to the current row. +Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. + +## Standard Window Functionos + +ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. | Feature | Support or workaround | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio ## ClickHouse-specific Window Functions +There are also the following window function that's specific to ClickHouse: + ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) Finds non-negative derivative for given `metric_column` by `timestamp_column`. @@ -33,40 +40,6 @@ The computed value is the following for each row: - `0` for 1st row, - ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. -## References - -### GitHub Issues - -The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). - -All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. - -### Tests - -These tests contain the examples of the currently supported grammar: - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql - -### Postgres Docs - -https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW - -https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS - -https://www.postgresql.org/docs/devel/functions-window.html - -https://www.postgresql.org/docs/devel/tutorial-window.html - -### MySQL Docs - -https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html - ## Syntax ```text @@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] - `PARTITION BY` - defines how to break a resultset into groups. - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function. - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame. -- `WINDOW` - allows to reuse a window definition with multiple expressions. - -### Functions - -These functions can be used only as a window function. - -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `WINDOW` - allows multiple expressions to use the same window definition. ```text PARTITION @@ -112,8 +72,23 @@ These functions can be used only as a window function. └─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION) ``` +### Functions + +These functions can be used only as a window function. + +- `row_number()` - Number the current row within its partition starting from 1. +- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. +- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- `rank()` - Rank the current row within its partition with gaps. +- `dense_rank()` - Rank the current row within its partition without gaps. +- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + ## Examples +Let's have a look at some examples of how window functions can be used. + ```sql CREATE TABLE wf_partition ( @@ -589,6 +564,41 @@ ORDER BY └──────────────┴─────────────────────┴───────┴─────────────────────────┘ ``` +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html + + ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) From 1c0aed896cde47e409ce254d2b46fa050f738655 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 6 Mar 2024 15:25:06 +0000 Subject: [PATCH 282/356] fix build --- src/Storages/System/IStorageSystemOneBlock.h | 2 +- .../System/StorageSystemDictionaries.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 49 ------------------- 3 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 8123143a345..a20434fd97e 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -1,4 +1,4 @@ -// #pragma once +#pragma once #include diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index b33d8f4fa2e..7b733c872c4 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -52,7 +52,7 @@ catch (const DB::Exception &) } StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_) - : DB::IStorageSystemOneBlock(storage_id_) + : IStorageSystemOneBlock(storage_id_, getColumnsDescription()) { VirtualColumnsDescription virtuals; virtuals.addEphemeral("key", std::make_shared(), ""); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index fdafbd61336..22466b3d4c2 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -109,55 +109,6 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) } } -void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast) -{ - if (block.rows() == 0) - return; - - if (!expression_ast) - prepareFilterBlockWithQuery(query, context, block, expression_ast); - - if (!expression_ast) - return; - - /// Let's analyze and calculate the prepared expression. - auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); - ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); - ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); - - makeSets(actions, context); - - Block block_with_filter = block; - actions->execute(block_with_filter); - - /// Filter the block. - String filter_column_name = expression_ast->getColumnName(); - ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullIfNeeded(); - if (filter_column->getDataType() != TypeIndex::UInt8) - return; - - ConstantFilterDescription constant_filter(*filter_column); - - if (constant_filter.always_true) - { - return; - } - - if (constant_filter.always_false) - { - block = block.cloneEmpty(); - return; - } - - FilterDescription filter(*filter_column); - - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnPtr & column = block.safeGetByPosition(i).column; - column = column->filter(*filter.data, -1); - } -} - NameSet getVirtualNamesForFileLikeStorage() { return {"_path", "_file", "_size"}; From d6325e267c51756dd8f4b98638e89ab05f0fd007 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 6 Mar 2024 16:43:53 +0100 Subject: [PATCH 283/356] Use pyproject.toml for pylynt configuration --- .pylintrc | 43 ----------------------------------- tests/sqllogic/connection.py | 5 +++- utils/check-style/check-style | 2 +- 3 files changed, 5 insertions(+), 45 deletions(-) delete mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index b672cbfdfad..00000000000 --- a/.pylintrc +++ /dev/null @@ -1,43 +0,0 @@ -# vim: ft=config - -[BASIC] -max-module-lines=2000 -# due to SQL -max-line-length=200 -# Drop/decrease them one day: -max-branches=50 -max-nested-blocks=10 -max-statements=200 - -[FORMAT] -ignore-long-lines = (# )??$ - -[MESSAGES CONTROL] -disable = missing-docstring, - too-few-public-methods, - invalid-name, - too-many-arguments, - keyword-arg-before-vararg, - too-many-locals, - too-many-instance-attributes, - cell-var-from-loop, - fixme, - too-many-public-methods, - wildcard-import, - unused-wildcard-import, - singleton-comparison, - # pytest.mark.parametrize is not callable (not-callable) - not-callable, - # https://github.com/PyCQA/pylint/issues/3882 - # [Python 3.9] Value 'Optional' is unsubscriptable (unsubscriptable-object) (also Union) - unsubscriptable-object, - # Drop them one day: - redefined-outer-name, - broad-except, - bare-except, - no-else-return, - global-statement - -[SIMILARITIES] -# due to SQL -min-similarity-lines=1000 diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 8bbb76d83df..169e0f0f440 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -59,7 +59,10 @@ def default_clickhouse_odbc_conn_str(): OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", Timeout="300", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&create_index_ignore_unique=1", + Url="http://localhost:8123/query?default_format=ODBCDriver2&" + "default_table_engine=MergeTree&union_default_mode=DISTINCT&" + "group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&" + "create_index_ignore_unique=1", ) ) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index d1fb2d81a28..3a5d0c053ea 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -155,7 +155,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | function xargs-pylint { # $1 is number maximum arguments per pylint process sort | awk '$2=="text/x-script.python" {print $1}' | \ - xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/.pylintrc" --persistent=no --score=n + xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n } find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 From 43d5338f9233f19f774293f59cafb0ff2093030e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Mar 2024 17:10:26 +0100 Subject: [PATCH 284/356] restart ci From fb1a684d98ab33d9260b236fb28c50372571030c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 6 Mar 2024 08:46:15 -0800 Subject: [PATCH 285/356] Fix race in PageCache (#60878) * Fix race in PageCache * Transportation security administration * Fix test flakiness, remove accidentally left over debug logging --- src/Common/PageCache.cpp | 42 ++++++++++--------- src/Common/PageCache.h | 20 ++++----- src/IO/CachedInMemoryReadBufferFromFile.cpp | 2 - src/IO/ReadSettings.h | 1 - .../0_stateless/02867_page_cache.reference | 16 ++++--- .../queries/0_stateless/02867_page_cache.sql | 13 +++--- 6 files changed, 47 insertions(+), 47 deletions(-) diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp index 511ec23d431..d4598d4683b 100644 --- a/src/Common/PageCache.cpp +++ b/src/Common/PageCache.cpp @@ -191,7 +191,7 @@ size_t PageCache::maxChunks() const { return chunks_per_mmap_target * max_mmaps; size_t PageCache::getPinnedSize() const { - std::unique_lock lock(global_mutex); + std::lock_guard lock(global_mutex); return (total_chunks - lru.size()) * bytes_per_page * pages_per_chunk; } @@ -202,8 +202,11 @@ PageCache::MemoryStats PageCache::getResidentSetSize() const if (use_madv_free) { std::unordered_set cache_mmap_addrs; - for (const auto & m : mmaps) - cache_mmap_addrs.insert(reinterpret_cast(m.ptr)); + { + std::lock_guard lock(global_mutex); + for (const auto & m : mmaps) + cache_mmap_addrs.insert(reinterpret_cast(m.ptr)); + } ReadBufferFromFile in("/proc/self/smaps"); @@ -283,6 +286,7 @@ PageCache::MemoryStats PageCache::getResidentSetSize() const } #endif + std::lock_guard lock(global_mutex); stats.page_cache_rss = bytes_per_page * pages_per_chunk * total_chunks; return stats; } @@ -294,12 +298,12 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing, bool incremented_profile_events = false; { - std::unique_lock lock(global_mutex); + std::lock_guard lock(global_mutex); auto * it = chunk_by_key.find(key); if (it == chunk_by_key.end()) { - chunk = getFreeChunk(lock); + chunk = getFreeChunk(); chassert(!chunk->key.has_value()); if (!detached_if_missing) @@ -331,14 +335,14 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing, /// otherwise we may detach a chunk pinned by someone else, which may be unexpected /// for that someone else. Or maybe the latter is fine, dropCache() already does it.) if (chunk->pages_populated.get(0) && reinterpret_cast*>(chunk->data)->load(std::memory_order_relaxed) == 0) - evictChunk(chunk, lock); + evictChunk(chunk); } if (inject_eviction && chunk->key.has_value() && rng() % 10 == 0) { /// Simulate eviction of the chunk or some of its pages. if (rng() % 2 == 0) - evictChunk(chunk, lock); + evictChunk(chunk); else for (size_t i = 0; i < 20; ++i) chunk->pages_populated.unset(rng() % (chunk->size / chunk->page_size)); @@ -353,7 +357,7 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing, } { - std::unique_lock chunk_lock(chunk->chunk_mutex); + std::lock_guard chunk_lock(chunk->chunk_mutex); if (chunk->pages_state == PageChunkState::Limbo) { @@ -383,7 +387,7 @@ void PageCache::removeRef(PageChunk * chunk) noexcept return; { - std::unique_lock lock(global_mutex); + std::lock_guard lock(global_mutex); prev_pin_count = chunk->pin_count.fetch_sub(1); if (prev_pin_count > 1) @@ -398,7 +402,7 @@ void PageCache::removeRef(PageChunk * chunk) noexcept } { - std::unique_lock chunk_lock(chunk->chunk_mutex); + std::lock_guard chunk_lock(chunk->chunk_mutex); /// Need to be extra careful here because we unlocked global_mutex above, so other /// getOrSet()/removeRef() calls could have happened during this brief period. @@ -421,7 +425,7 @@ static void logUnexpectedSyscallError(std::string name) #endif } -void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique_lock & /* chunk_mutex */) const noexcept +void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::lock_guard & /* chunk_mutex */) const noexcept { #ifdef MADV_FREE // if we're not on a very old version of Linux chassert(chunk->size == bytes_per_page * pages_per_chunk); @@ -454,7 +458,7 @@ void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique #endif } -std::pair PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept +std::pair PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::lock_guard & /* chunk_mutex */) const noexcept { static_assert(sizeof(std::atomic) == 1, "char is not atomic?"); // Make sure our strategic memory reads/writes are not reordered or optimized out. @@ -505,10 +509,10 @@ std::pair PageCache::restoreChunkFromLimbo(PageChunk * chunk, st return {pages_restored, pages_evicted}; } -PageChunk * PageCache::getFreeChunk(std::unique_lock & lock /* global_mutex */) +PageChunk * PageCache::getFreeChunk() { if (lru.empty() || (mmaps.size() < max_mmaps && lru.front().key.has_value())) - addMmap(lock); + addMmap(); if (lru.empty()) throw Exception(ErrorCodes::MEMORY_LIMIT_EXCEEDED, "All chunks in the entire page cache ({:.3} GiB) are pinned.", bytes_per_page * pages_per_chunk * total_chunks * 1. / (1l << 30)); @@ -519,12 +523,12 @@ PageChunk * PageCache::getFreeChunk(std::unique_lock & lock /* globa size_t prev_pin_count = chunk->pin_count.fetch_add(1); chassert(prev_pin_count == 0); - evictChunk(chunk, lock); + evictChunk(chunk); return chunk; } -void PageCache::evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */) +void PageCache::evictChunk(PageChunk * chunk) { if (chunk->key.has_value()) { @@ -548,7 +552,7 @@ void PageCache::evictChunk(PageChunk * chunk, std::unique_lock & /* chunk->pages_populated.unsetAll(); } -void PageCache::addMmap(std::unique_lock & /* global_mutex */) +void PageCache::addMmap() { /// ASLR by hand. void * address_hint = reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(rng)); @@ -564,13 +568,13 @@ void PageCache::addMmap(std::unique_lock & /* global_mutex */) void PageCache::dropCache() { - std::unique_lock lock(global_mutex); + std::lock_guard lock(global_mutex); /// Detach and free unpinned chunks. bool logged_error = false; for (PageChunk & chunk : lru) { - evictChunk(&chunk, lock); + evictChunk(&chunk); if (use_madv_free) { diff --git a/src/Common/PageCache.h b/src/Common/PageCache.h index a3f465a82f5..04411e6fc24 100644 --- a/src/Common/PageCache.h +++ b/src/Common/PageCache.h @@ -270,28 +270,28 @@ private: mutable std::mutex global_mutex; - pcg64 rng; + pcg64 rng TSA_GUARDED_BY(global_mutex); - std::vector mmaps; - size_t total_chunks = 0; + std::vector mmaps TSA_GUARDED_BY(global_mutex); + size_t total_chunks TSA_GUARDED_BY(global_mutex) = 0; /// All non-pinned chunks, including ones not assigned to any file. Least recently used is begin(). - boost::intrusive::list, boost::intrusive::constant_time_size> lru; + boost::intrusive::list, boost::intrusive::constant_time_size> lru TSA_GUARDED_BY(global_mutex); - HashMap chunk_by_key; + HashMap chunk_by_key TSA_GUARDED_BY(global_mutex); /// Get a usable chunk, doing eviction or allocation if needed. /// Caller is responsible for clearing pages_populated. - PageChunk * getFreeChunk(std::unique_lock & /* global_mutex */); - void addMmap(std::unique_lock & /* global_mutex */); - void evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */); + PageChunk * getFreeChunk() TSA_REQUIRES(global_mutex); + void addMmap() TSA_REQUIRES(global_mutex); + void evictChunk(PageChunk * chunk) TSA_REQUIRES(global_mutex); void removeRef(PageChunk * chunk) noexcept; /// These may run in parallel with getFreeChunk(), so be very careful about which fields of the PageChunk we touch here. - void sendChunkToLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; + void sendChunkToLimbo(PageChunk * chunk, std::lock_guard & /* chunk_mutex */) const noexcept; /// Returns {pages_restored, pages_evicted}. - std::pair restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; + std::pair restoreChunkFromLimbo(PageChunk * chunk, std::lock_guard & /* chunk_mutex */) const noexcept; }; using PageCachePtr = std::shared_ptr; diff --git a/src/IO/CachedInMemoryReadBufferFromFile.cpp b/src/IO/CachedInMemoryReadBufferFromFile.cpp index 384d2229f14..ceaf0ca4752 100644 --- a/src/IO/CachedInMemoryReadBufferFromFile.cpp +++ b/src/IO/CachedInMemoryReadBufferFromFile.cpp @@ -136,7 +136,6 @@ bool CachedInMemoryReadBufferFromFile::nextImpl() char * piece_start = chunk->getChunk()->data + pos; size_t piece_size = chunk_size - pos; in->set(piece_start, piece_size); - LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, in {:x}, path {}, size {}, offset {:x}, pos {:x}", reinterpret_cast(this), reinterpret_cast(in.get()), cache_key.path, file_size.value(), cache_key.offset, pos); if (pos == 0) in->seek(cache_key.offset, SEEK_SET); else @@ -155,7 +154,6 @@ bool CachedInMemoryReadBufferFromFile::nextImpl() memmove(piece_start, in->position(), n); in->position() += n; pos += n; - LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, got {:x} bytes", reinterpret_cast(this), n); } chunk->markPrefixPopulated(chunk_size); diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index f4dc7880be4..31ea45d92a9 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -103,7 +103,6 @@ struct ReadSettings bool avoid_readthrough_cache_outside_query_context = true; size_t filesystem_cache_segments_batch_size = 20; - //asdqwe assign these two bool use_page_cache_for_disks_without_file_cache = false; bool read_from_page_cache_if_exists_otherwise_bypass_cache = false; bool page_cache_inject_eviction = false; diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference index 5502059508a..c3d6484a175 100644 --- a/tests/queries/0_stateless/02867_page_cache.reference +++ b/tests/queries/0_stateless/02867_page_cache.reference @@ -1,23 +1,21 @@ -54975576145920 +cold read 54975576145920 PageCacheBytesUnpinnedRoundedToHugePages 1 PageCacheBytesUnpinnedRoundedToPages 1 PageCacheChunkMisses 1 ReadBufferFromS3Bytes 1 -54975576145920 +repeat read 1 54975576145920 PageCacheBytesUnpinnedRoundedToHugePages 1 PageCacheBytesUnpinnedRoundedToPages 1 PageCacheChunkDataHits 1 -54975576145920 +dropped and bypassed cache 54975576145920 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +repeat read 2 54975576145920 PageCacheBytesUnpinnedRoundedToHugePages 1 PageCacheBytesUnpinnedRoundedToPages 1 PageCacheChunkMisses 1 ReadBufferFromS3Bytes 1 -54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -54975576145920 +repeat read 3 54975576145920 PageCacheBytesUnpinnedRoundedToHugePages 1 PageCacheBytesUnpinnedRoundedToPages 1 PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql index 8765b30ebc3..f1882de4af6 100644 --- a/tests/queries/0_stateless/02867_page_cache.sql +++ b/tests/queries/0_stateless/02867_page_cache.sql @@ -46,7 +46,7 @@ insert into events_snapshot select * from system.events; -- Cold read, should miss cache. (Populating cache on write is not implemented yet.) -select sum(k) from page_cache_03055; +select 'cold read', sum(k) from page_cache_03055; select * from events_diff where event not in ('PageCacheChunkDataHits'); truncate table events_snapshot; @@ -54,7 +54,7 @@ insert into events_snapshot select * from system.events; -- Repeat read, should hit cache. -select sum(k) from page_cache_03055; +select 'repeat read 1', sum(k) from page_cache_03055; select * from events_diff; truncate table events_snapshot; @@ -64,16 +64,17 @@ insert into events_snapshot select * from system.events; system drop page cache; -select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; +select 'dropped and bypassed cache', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; -- Data could be read multiple times because we're not writing to cache. -select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits'); +-- (Not checking PageCacheBytesUnpinned* because it's unreliable in this case because of an intentional race condition, see PageCache::evictChunk.) +select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits', 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages'); truncate table events_snapshot; insert into events_snapshot select * from system.events; -- Repeat read, should still miss, but populate cache. -select sum(k) from page_cache_03055; +select 'repeat read 2', sum(k) from page_cache_03055; select * from events_diff where event not in ('PageCacheChunkDataHits'); truncate table events_snapshot; @@ -81,7 +82,7 @@ insert into events_snapshot select * from system.events; -- Read again, hit the cache. -select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; +select 'repeat read 3', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; select * from events_diff; truncate table events_snapshot; From c8878332ca8a07d4c2a6bb0f4af4580d47dba779 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 6 Mar 2024 16:46:59 +0000 Subject: [PATCH 286/356] fix StorageSystemDictionaries --- src/Storages/System/StorageSystemDictionaries.cpp | 4 ++-- src/Storages/System/StorageSystemDictionaries.h | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index 7b733c872c4..a19741f92d1 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -51,8 +51,8 @@ catch (const DB::Exception &) } -StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_) - : IStorageSystemOneBlock(storage_id_, getColumnsDescription()) +StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_) + : IStorageSystemOneBlock(storage_id_, std::move(columns_description_)) { VirtualColumnsDescription virtuals; virtuals.addEphemeral("key", std::make_shared(), ""); diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 5a8d7eae167..058b8b163d9 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -12,15 +12,13 @@ class Context; class StorageSystemDictionaries final : public IStorageSystemOneBlock { public: - explicit StorageSystemDictionaries(const StorageID & storage_id_); + StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_); std::string getName() const override { return "SystemDictionaries"; } static ColumnsDescription getColumnsDescription(); protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; From 2730f0b54fa5d7e81ca6e502bcb7567266cff4d9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 6 Mar 2024 16:48:03 +0000 Subject: [PATCH 287/356] Allow subqueries for IN with parallel replicas under a setting. --- src/Core/Settings.h | 1 + src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- src/Planner/Planner.cpp | 2 +- src/Planner/findParallelReplicasQuery.cpp | 12 +- ...49_parallel_replicas_in_subquery.reference | 5 + .../02949_parallel_replicas_in_subquery.sql | 12 +- ...llel_replicas_joins_and_analyzer.reference | 176 +++++++++++++++++- ...arallel_replicas_joins_and_analyzer.sql.j2 | 38 ++++ 8 files changed, 238 insertions(+), 10 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 99a48d043d0..b1cabe66aaf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -186,6 +186,7 @@ class IColumn; \ M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_subqueries_for_in, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index c53e54573c5..bb3bd120303 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -216,7 +216,7 @@ private: if (enable_parallel_processing_of_joins) { /// We don't enable parallel replicas for IN (subquery) - if (ast->as()) + if (!settings.parallel_replicas_allow_subqueries_for_in && ast->as()) { if (settings.allow_experimental_parallel_reading_from_replicas == 1) { diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index efd8f4faa42..219f67ecbd8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1373,7 +1373,7 @@ void Planner::buildPlanForQueryNode() const auto & settings = query_context->getSettingsRef(); if (query_context->canUseTaskBasedParallelReplicas()) { - if (planner_context->getPreparedSets().hasSubqueries()) + if (!settings.parallel_replicas_allow_subqueries_for_in && planner_context->getPreparedSets().hasSubqueries()) { if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 95afcb605b3..e0e47915047 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -156,7 +157,8 @@ QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, co /// Otherwise we can execute current query up to WithMergableStage only. const QueryNode * findQueryForParallelReplicas( std::stack stack, - const std::unordered_map & mapping) + const std::unordered_map & mapping, + const Settings & settings) { const QueryPlan::Node * prev_checked_node = nullptr; const QueryNode * res = nullptr; @@ -192,7 +194,11 @@ const QueryNode * findQueryForParallelReplicas( { const auto * expression = typeid_cast(step); const auto * filter = typeid_cast(step); - if (!expression && !filter) + + const auto * creating_sets = typeid_cast(step); + bool allowed_creating_sets = settings.parallel_replicas_allow_subqueries_for_in && creating_sets; + + if (!expression && !filter && !allowed_creating_sets) can_distribute_full_node = false; next_node_to_check = children.front(); @@ -274,7 +280,7 @@ const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tr /// So that we build a list of candidates again, and call findQueryForParallelReplicas for it. auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get()); const auto & mapping = planner.getQueryNodeToPlanStepMapping(); - const auto * res = findQueryForParallelReplicas(new_stack, mapping); + const auto * res = findQueryForParallelReplicas(new_stack, mapping, context->getSettingsRef()); /// Now, return a query from initial stack. if (res) diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference index 4d33751c699..8ae3cdf8f3a 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference @@ -2,7 +2,12 @@ 2 test2 8 3 test3 8 4 test4 1985 +2 test2 8 +3 test3 8 +4 test4 1985 --- 1 test1 42 +1 test1 42 --- 3 test3 +3 test3 diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql index 53b8a761cda..9000d37c801 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql @@ -8,19 +8,23 @@ INSERT INTO merge_tree_in_subqueries VALUES(5, 'test5', 0); SET max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; -- IN with tuples is allowed SELECT '---'; -SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; DROP TABLE IF EXISTS merge_tree_in_subqueries; diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 6b1fdfd42a2..edd99058bd9 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -237,7 +237,7 @@ sub2 as (select y, z from tab2 where y != 4), sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) -select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;-- { echoOn } +select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting Expression @@ -250,6 +250,93 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0;-- { echoOn } +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas set parallel_replicas_prefer_local_join = 1; -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode. select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; @@ -500,3 +587,90 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 index 7d2766d52f8..e0de8c64950 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 @@ -126,4 +126,42 @@ sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; + {%- endfor %} From 4dd5e4fe993b8ca2a5f693f8f809fa0c2fc76fa2 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 6 Mar 2024 16:30:47 +0000 Subject: [PATCH 288/356] CI: fix stage config (unit test release issue) #no_ci_cache --- tests/ci/ci_config.py | 16 ++++++++++++---- tests/ci/test_ci_config.py | 20 +++++++++++++++++++- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 308a9098c29..4d944d24765 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -11,7 +11,7 @@ from ci_utils import WithIter from integration_test_images import IMAGES -class CIStages: +class CIStages(metaclass=WithIter): NA = "UNKNOWN" BUILDS_1 = "Builds_1" BUILDS_2 = "Builds_2" @@ -547,9 +547,17 @@ class CIConfig: stage_type = CIStages.TESTS_2 elif self.is_test_job(job_name): stage_type = CIStages.TESTS_1 - if job_name == JobNames.LIBFUZZER_TEST: - # since fuzzers build in Builds_2, test must be in Tests_2 - stage_type = CIStages.TESTS_2 + if job_name in CI_CONFIG.test_configs: + required_build = CI_CONFIG.test_configs[job_name].required_build + assert required_build + if required_build in CI_CONFIG.get_builds_for_report( + JobNames.BUILD_CHECK + ): + stage_type = CIStages.TESTS_1 + else: + stage_type = CIStages.TESTS_2 + else: + stage_type = CIStages.TESTS_1 assert stage_type, f"BUG [{job_name}]" return stage_type diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 04c90105276..badbc4c5dcf 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import unittest -from ci_config import JobNames, CI_CONFIG, Runners +from ci_config import CIStages, JobNames, CI_CONFIG, Runners class TestCIConfig(unittest.TestCase): @@ -10,3 +10,21 @@ class TestCIConfig(unittest.TestCase): for job in JobNames: runner = CI_CONFIG.get_runner_type(job) self.assertIn(runner, Runners) + + def test_job_stage_config(self): + """check runner is provided w/o exception""" + for job in JobNames: + stage = CI_CONFIG.get_job_ci_stage(job) + if job in [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + JobNames.JEPSEN_KEEPER, + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ]: + assert ( + stage == CIStages.NA + ), "These jobs are not in CI stages, must be NA" + else: + assert stage != CIStages.NA, f"stage not found for [{job}]" + self.assertIn(stage, CIStages) From 5e4c51ec05fbb6d2c21b8b5371170b1454a4f16c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 6 Mar 2024 17:02:08 +0000 Subject: [PATCH 289/356] Update settings history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 7b35c9fb239..3f816586085 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,6 +90,7 @@ static std::map sett {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"parallel_replicas_allow_subqueries_for_in", false, true, "If true, subquery for IN will be executed on every follower replica"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 0dea920a06d32acdda751cab20ef475163556769 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:26:16 +0100 Subject: [PATCH 290/356] reload checks From a0d161704a44c74c9a19ed3e8c7ac9ba79f8a2db Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 6 Mar 2024 17:55:04 +0000 Subject: [PATCH 291/356] CI: remove sqllancer, sqllogic, sqltest from pr wf #do_not_test --- tests/ci/ci_config.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 4d944d24765..2b19e0e5c13 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -473,21 +473,24 @@ perf_test_common_params = { "digest": perf_check_digest, "run_command": "performance_comparison_check.py", } -sqllancer_test_common_params = { - "digest": sqllancer_check_digest, - "run_command": "sqlancer_check.py", - "run_always": True, -} -sqllogic_test_params = { - "digest": sqllogic_check_digest, - "run_command": "sqllogic_test.py", - "timeout": 10800, -} -sql_test_params = { - "digest": sqltest_check_digest, - "run_command": "sqltest.py", - "timeout": 10800, -} +sqllancer_test_common_params = JobConfig( + digest=sqllancer_check_digest, + run_command="sqlancer_check.py", + release_only=True, + run_always=True, +) +sqllogic_test_params = JobConfig( + digest=sqllogic_check_digest, + run_command="sqllogic_test.py", + timeout=10800, + release_only=True, +) +sql_test_params = JobConfig( + digest=sqltest_check_digest, + run_command="sqltest.py", + timeout=10800, + release_only=True, +) clickbench_test_params = { "digest": DigestConfig( include_paths=[ @@ -1256,17 +1259,15 @@ CI_CONFIG = CIConfig( job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore ), JobNames.SQLANCER: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params ), JobNames.SQLANCER_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + Build.PACKAGE_DEBUG, job_config=sqllancer_test_common_params ), JobNames.SQL_LOGIC_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllogic_test_params) # type: ignore - ), - JobNames.SQLTEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=sqllogic_test_params ), + JobNames.SQLTEST: TestConfig(Build.PACKAGE_RELEASE, job_config=sql_test_params), JobNames.CLCIKBENCH_TEST: TestConfig( Build.PACKAGE_RELEASE, job_config=JobConfig(**clickbench_test_params) # type: ignore ), From d008b4eb4f32aa9d5473699dccfc094b2a47d1da Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 6 Mar 2024 20:24:38 +0100 Subject: [PATCH 292/356] fix test --- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index c6407a99a4e..ff9941ee808 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -623,6 +623,15 @@ void DataPartStorageOnDiskBase::remove( } } + if (!disk->exists(from)) + { + LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from)); + /// We will never touch this part again, so unlocking it from zero-copy + if (!can_remove_description) + can_remove_description.emplace(can_remove_callback()); + return; + } + try { disk->moveDirectory(from, to); From 4e1257974c597fe97a4a024eb2cd454f152c762a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 6 Mar 2024 18:26:20 +0000 Subject: [PATCH 293/356] CI: disable rerun check helper for build report jobs #do_not_test --- tests/ci/ci.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index b222e81ad73..5049c4a6558 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1796,24 +1796,29 @@ def main() -> int: print(build_result.as_json()) print("::endgroup::") else: - # this is a test job - check if GH commit status is present - - # rerun helper check - # FIXME: remove rerun_helper check and rely on ci cache only + # this is a test job - check if GH commit status or cache record is present commit = get_commit( Github(get_best_robot_token(), per_page=100), pr_info.sha ) - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - status = rerun_helper.get_finished_status() - assert status - previous_status = status.state - print("::group::Commit Status") - print(status) - print("::endgroup::") + + # rerun helper check + # FIXME: remove rerun_helper check and rely on ci cache only + if check_name not in ( + # we might want to rerun reports' jobs - disable rerun check for them + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ): + rerun_helper = RerunHelper(commit, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + status = rerun_helper.get_finished_status() + assert status + previous_status = status.state + print("::group::Commit Status") + print(status) + print("::endgroup::") # ci cache check - elif not indata["ci_flags"][Labels.NO_CI_CACHE]: + if not previous_status and not indata["ci_flags"][Labels.NO_CI_CACHE]: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() job_config = CI_CONFIG.get_job_config(check_name) if ci_cache.is_successful( From dbdaa9863e4528e578315531ce3725bb09973901 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Wed, 6 Mar 2024 15:54:04 -0800 Subject: [PATCH 294/356] Refactor PR according to review --- CMakeLists.txt | 4 ++-- cmake/linux/toolchain-aarch64.cmake | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bd179a799c..49dc12b89d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -319,8 +319,8 @@ if (COMPILER_CLANG) endif() endif () -set (COMPILER_FLAGS "${COMPILER_FLAGS}") - +# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms +set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index d2ce2d97d8e..b80cc01296d 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -13,6 +13,6 @@ set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-aarch set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-contract=off --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffp-contract=off --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") From ae7772e5873d3db874dcf1a0e10cc448fdae1e3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 Mar 2024 00:58:13 +0100 Subject: [PATCH 295/356] Fix a typo --- .../02908_many_requests_to_system_replicas.reference | 2 +- .../0_stateless/02908_many_requests_to_system_replicas.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index af0e50ec332..f1ca07ef408 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,5 +1,5 @@ Creating 300 tables -Making making 200 requests to system.replicas +Making 200 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 900 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index f93175529c0..d3eed891ab9 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -33,7 +33,7 @@ done wait; -echo "Making making $CONCURRENCY requests to system.replicas" +echo "Making $CONCURRENCY requests to system.replicas" for i in `seq 1 $CONCURRENCY`; do From 0f4cd3198df59ba7e60730d6a815f8d40f90c913 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 7 Mar 2024 01:09:18 +0000 Subject: [PATCH 296/356] add version specification --- docs/en/sql-reference/statements/grant.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 879354d714b..b9c69b16e85 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -507,7 +507,7 @@ are turned on. ### NAMED COLLECTION ADMIN -Allows a certain operation on a specified named colleciton. +Allows a certain operation on a specified named colleciton. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN as added and NAMED COLLECTION CONTROL is preserved as an alias. - `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` @@ -517,9 +517,11 @@ Allows a certain operation on a specified named colleciton. - `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS` - `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION` +Unlike all other grants (CREATE, DROP, ALTER, SHOW) grant NAMED COLLECTION was added only in 23.7, while all others were added earlier - in 22.12. + **Examples** -Assuming a named collecion is called abc, we grant privilege CREATE NAMED COLLECTION to user john. +Assuming a named collection is called abc, we grant privilege CREATE NAMED COLLECTION to user john. - `GRANT CREATE NAMED COLLECTION ON abc TO john` ### ALL From c901c9c1f31a24e4b28f579a6d704de6be7231d7 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 7 Mar 2024 01:47:10 +0000 Subject: [PATCH 297/356] fix typo --- docs/en/sql-reference/statements/grant.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index b9c69b16e85..c7ce7ffe5e4 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -507,7 +507,7 @@ are turned on. ### NAMED COLLECTION ADMIN -Allows a certain operation on a specified named colleciton. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN as added and NAMED COLLECTION CONTROL is preserved as an alias. +Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN as added and NAMED COLLECTION CONTROL is preserved as an alias. - `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` From de7f7e196edd5fdb6b9ab2c2712acda918d02ddd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 7 Mar 2024 13:55:22 +0800 Subject: [PATCH 298/356] Update docs/en/sql-reference/statements/grant.md --- docs/en/sql-reference/statements/grant.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index c7ce7ffe5e4..a93db29e82c 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -507,7 +507,7 @@ are turned on. ### NAMED COLLECTION ADMIN -Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN as added and NAMED COLLECTION CONTROL is preserved as an alias. +Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN was added and NAMED COLLECTION CONTROL is preserved as an alias. - `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` From 9e7894d8cbf4ea08657326083cf677699ed0be12 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 7 Mar 2024 16:38:07 +0800 Subject: [PATCH 299/356] Reduce the number of read rows from `system.numbers` (#60546) * Fix read more data for system.numbers * Fix tests --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 22 ++++++++++++++----- .../integration/test_storage_numbers/test.py | 16 ++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index c72c63d09c4..a294683c640 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -26,9 +26,11 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 step_) : ISource(createHeader()), block_size(block_size_), next(offset_), step(step_) { + if (limit_.has_value()) + end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -38,24 +40,32 @@ public: protected: Chunk generate() override { - auto column = ColumnUInt64::create(block_size); + UInt64 real_block_size = block_size; + if (end.has_value()) + { + if (end.value() <= next) + return {}; + real_block_size = std::min(block_size, end.value() - next); + } + auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[block_size]; - iota(pos, static_cast(end - pos), curr); + UInt64 * end_ = &vec[real_block_size]; + iota(pos, static_cast(end_ - pos), curr); next += step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, block_size}; + return {Columns{std::move(column)}, real_block_size}; } private: UInt64 block_size; UInt64 next; + std::optional end; /// not included UInt64 step; }; @@ -478,7 +488,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() for (size_t i = 0; i < num_streams; ++i) { auto source - = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size); + = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, numbers_storage.limit, num_streams * max_block_size); if (numbers_storage.limit && i == 0) { diff --git a/tests/integration/test_storage_numbers/test.py b/tests/integration/test_storage_numbers/test.py index 61fe8719ea2..cbd7793fd8c 100644 --- a/tests/integration/test_storage_numbers/test.py +++ b/tests/integration/test_storage_numbers/test.py @@ -242,3 +242,19 @@ def test_overflow(started_cluster): ) assert response == "(18446744073709551614),(18446744073709551615),(0),(1),(2)" check_read_rows("test_overflow", 5) + + +def test_non_number_filter(started_cluster): + response = node.query( + "SELECT toString(number) as a FROM numbers(3) WHERE a = '1' FORMAT Values", + query_id="test_non_number_filter", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter", 3) + + response = node.query( + "SELECT toString(number) as a FROM numbers(1, 4) WHERE a = '1' FORMAT Values SETTINGS max_block_size = 3", + query_id="test_non_number_filter2", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter2", 4) From 9967c60c5391f74164323320a6be6103082da653 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 13 Feb 2024 19:50:51 +0100 Subject: [PATCH 300/356] Update sccache --- docker/test/util/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 396d5801be9..dea2e448a2a 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -94,7 +94,7 @@ RUN mkdir /tmp/ccache \ && rm -rf /tmp/ccache ARG TARGETARCH -ARG SCCACHE_VERSION=v0.5.4 +ARG SCCACHE_VERSION=v0.7.7 ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 # sccache requires a value for the region. So by default we use The Default Region ENV SCCACHE_REGION=us-east-1 From 0cedafef1ea072a783f4893391f9470a7ed9c1ff Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 16 Feb 2024 14:50:56 +0100 Subject: [PATCH 301/356] Do not test odbc in fast tests --- docker/test/fasttest/Dockerfile | 9 --------- 1 file changed, 9 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e10555d4d4a..b91410941ad 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -26,15 +26,6 @@ RUN apt-get update \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" - -RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp - # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent RUN chmod u+s /usr/bin/gdb \ From 1d8fe381beef1552015522b06136e22868b03894 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Feb 2024 14:30:21 +0100 Subject: [PATCH 302/356] Update the odbc in stateless tests --- docker/test/stateless/Dockerfile | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7f4bad3d4e6..19b8a5ec65b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -3,7 +3,7 @@ ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" # golang version 1.13 on Ubuntu 20 is enough for tests RUN apt-get update -y \ @@ -50,11 +50,14 @@ RUN apt-get update -y \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone From 176ed5f96b31190d33a7966e5f93204b2402a82d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 13 Feb 2024 21:09:06 +0100 Subject: [PATCH 303/356] Use curl instead of wget --- docker/test/stateless/Dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 19b8a5ec65b..cd8864c6299 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -35,7 +35,6 @@ RUN apt-get update -y \ sudo \ tree \ unixodbc \ - wget \ rustc \ cargo \ zstd \ @@ -73,11 +72,11 @@ ARG TARGETARCH # Download Minio-related binaries RUN arch=${TARGETARCH:-amd64} \ - && wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \ - && wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \ + && curl -L "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -o ./minio \ + && curl -L "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -o ./mc \ && chmod +x ./mc ./minio -RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz From cf0780a777bf43aa0496d0f28ec8d551319730ff Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 14 Feb 2024 14:09:58 +0100 Subject: [PATCH 304/356] Change the binary-builder directory --- docker/images.json | 4 ++-- docker/packager/{binary => binary-builder}/Dockerfile | 0 docker/packager/{binary => binary-builder}/build.sh | 0 docker/packager/packager | 8 ++++---- 4 files changed, 6 insertions(+), 6 deletions(-) rename docker/packager/{binary => binary-builder}/Dockerfile (100%) rename docker/packager/{binary => binary-builder}/build.sh (100%) diff --git a/docker/images.json b/docker/images.json index 2bf1efe005f..6c9935fe3ff 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,5 +1,5 @@ { - "docker/packager/binary": { + "docker/packager/binary-builder": { "name": "clickhouse/binary-builder", "dependent": [] }, @@ -30,7 +30,7 @@ "docker/test/util": { "name": "clickhouse/test-util", "dependent": [ - "docker/packager/binary", + "docker/packager/binary-builder", "docker/test/base", "docker/test/fasttest" ] diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary-builder/Dockerfile similarity index 100% rename from docker/packager/binary/Dockerfile rename to docker/packager/binary-builder/Dockerfile diff --git a/docker/packager/binary/build.sh b/docker/packager/binary-builder/build.sh similarity index 100% rename from docker/packager/binary/build.sh rename to docker/packager/binary-builder/build.sh diff --git a/docker/packager/packager b/docker/packager/packager index ca0ae8358f3..23fc26bc1a4 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -1,16 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import subprocess -import os import argparse import logging +import os +import subprocess import sys from pathlib import Path from typing import List, Optional SCRIPT_PATH = Path(__file__).absolute() -IMAGE_TYPE = "binary" -IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}-builder" +IMAGE_TYPE = "binary-builder" +IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}" class BuildException(Exception): From 0cbfe3421f78dd68d2caa30be69690761fe79d84 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 16 Feb 2024 16:52:15 +0100 Subject: [PATCH 305/356] Reduce size of test images by removing clang from there --- docker/images.json | 5 ++- docker/packager/binary-builder/Dockerfile | 4 +- docker/test/fasttest/Dockerfile | 54 +++++++++++++++++++++++ docker/test/util/Dockerfile | 54 ----------------------- 4 files changed, 59 insertions(+), 58 deletions(-) diff --git a/docker/images.json b/docker/images.json index 6c9935fe3ff..0d852b1ca8d 100644 --- a/docker/images.json +++ b/docker/images.json @@ -30,7 +30,6 @@ "docker/test/util": { "name": "clickhouse/test-util", "dependent": [ - "docker/packager/binary-builder", "docker/test/base", "docker/test/fasttest" ] @@ -67,7 +66,9 @@ }, "docker/test/fasttest": { "name": "clickhouse/fasttest", - "dependent": [] + "dependent": [ + "docker/packager/binary-builder" + ] }, "docker/test/style": { "name": "clickhouse/style-test", diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index e20cbe9781c..6c2fc3dba57 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -1,6 +1,6 @@ # docker build -t clickhouse/binary-builder . ARG FROM_TAG=latest -FROM clickhouse/test-util:latest AS cctools +FROM clickhouse/fasttest:latest AS cctools # The cctools are built always from the clickhouse/test-util:latest and cached inline # Theoretically, it should improve rebuild speed significantly ENV CC=clang-${LLVM_VERSION} @@ -37,7 +37,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ # END COMPILE # !!!!!!!!!!! -FROM clickhouse/test-util:$FROM_TAG +FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index b91410941ad..62cdcc3f830 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -6,9 +6,18 @@ FROM clickhouse/test-util:$FROM_TAG RUN apt-get update \ && apt-get install \ brotli \ + clang-${LLVM_VERSION} \ + clang-tidy-${LLVM_VERSION} \ + cmake \ expect \ file \ + libclang-${LLVM_VERSION}-dev \ + libclang-rt-${LLVM_VERSION}-dev \ + lld-${LLVM_VERSION} \ + llvm-${LLVM_VERSION} \ + llvm-${LLVM_VERSION}-dev \ lsof \ + ninja-build \ odbcinst \ psmisc \ python3 \ @@ -26,6 +35,51 @@ RUN apt-get update \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +# This symlink is required by gcc to find the lld linker +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake + +ARG CCACHE_VERSION=4.6.1 +RUN mkdir /tmp/ccache \ + && cd /tmp/ccache \ + && curl -L \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ + && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ + && gpg --verify ccache-4.6.1.tar.xz.asc \ + && tar xf ccache-$CCACHE_VERSION.tar.xz \ + && cd /tmp/ccache/ccache-$CCACHE_VERSION \ + && cmake -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_BUILD_TYPE=None \ + -DZSTD_FROM_INTERNET=ON \ + -DREDIS_STORAGE_BACKEND=OFF \ + -Wno-dev \ + -B build \ + -S . \ + && make VERBOSE=1 -C build \ + && make install -C build \ + && cd / \ + && rm -rf /tmp/ccache + +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.7.7 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r + # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent RUN chmod u+s /usr/bin/gdb \ diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index dea2e448a2a..4f2dc9df849 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -41,20 +41,11 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ - cmake \ gdb \ git \ gperf \ - libclang-rt-${LLVM_VERSION}-dev \ - lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ - llvm-${LLVM_VERSION}-dev \ - libclang-${LLVM_VERSION}-dev \ moreutils \ nasm \ - ninja-build \ pigz \ rename \ software-properties-common \ @@ -63,49 +54,4 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -# This symlink is required by gcc to find the lld linker -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld -# for external_symbolizer_path -RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer -# FIXME: workaround for "The imported target "merge-fdata" references the file" error -# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d -RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake - -ARG CCACHE_VERSION=4.6.1 -RUN mkdir /tmp/ccache \ - && cd /tmp/ccache \ - && curl -L \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ - && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ - && gpg --verify ccache-4.6.1.tar.xz.asc \ - && tar xf ccache-$CCACHE_VERSION.tar.xz \ - && cd /tmp/ccache/ccache-$CCACHE_VERSION \ - && cmake -DCMAKE_INSTALL_PREFIX=/usr \ - -DCMAKE_BUILD_TYPE=None \ - -DZSTD_FROM_INTERNET=ON \ - -DREDIS_STORAGE_BACKEND=OFF \ - -Wno-dev \ - -B build \ - -S . \ - && make VERBOSE=1 -C build \ - && make install -C build \ - && cd / \ - && rm -rf /tmp/ccache - -ARG TARGETARCH -ARG SCCACHE_VERSION=v0.7.7 -ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 -# sccache requires a value for the region. So by default we use The Default Region -ENV SCCACHE_REGION=us-east-1 -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ - tar xz -C /tmp \ - && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ - && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r - COPY process_functional_tests_result.py / From a0dbe2aff1fb1742ef4cf3bb84072ba08672d811 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 16 Feb 2024 17:15:01 +0100 Subject: [PATCH 306/356] Do not run long 02265_test_dns_profile_events in fast tests --- tests/queries/0_stateless/02265_test_dns_profile_events.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02265_test_dns_profile_events.sh b/tests/queries/0_stateless/02265_test_dns_profile_events.sh index 756a761a0ae..50fa6ba2cda 100755 --- a/tests/queries/0_stateless/02265_test_dns_profile_events.sh +++ b/tests/queries/0_stateless/02265_test_dns_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel +# Tags: no-parallel, no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From e5a7ec7be68ac6c72b3eb27f9cfcce5c20c1a353 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 21 Feb 2024 18:52:11 +0100 Subject: [PATCH 307/356] Do not build odbc driver in sqllogic --- docker/test/sqllogic/Dockerfile | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 05130044c45..1ea1e52e6fa 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -24,17 +24,18 @@ RUN pip3 install \ deepdiff \ sqlglot -ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" + +RUN mkdir -p /tmp/clickhouse-odbc-tmp \ + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp -RUN git clone --recursive ${odbc_repo} \ - && mkdir -p /clickhouse-odbc/build \ - && cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && make -j 10 -C /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \ - && odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample ENV TZ=Europe/Amsterdam ENV MAX_RUN_TIME=9000 From dd5510674e43b7ab6ecccd2e7be23f6c56097d74 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 7 Mar 2024 17:17:47 +0800 Subject: [PATCH 308/356] Update docs/zh/sql-reference/data-types/array.md Co-authored-by: flynn --- docs/zh/sql-reference/data-types/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index 41ed98fd053..da4cea65101 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -69,7 +69,7 @@ Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception ## æ•°ç»„å¤§å° {#array-size} -å¯ä»¥ä½¿ç”¨`size0`å­åˆ—找到数组的大å°ï¼Œè€Œæ— éœ€è¯»å–整个列。对于多维数组,您å¯ä»¥ä½¿ç”¨`sizeN-1`,其中`N`是所需的维度。 +å¯ä»¥ä½¿ç”¨ `size0` å­åˆ—找到数组的大å°ï¼Œè€Œæ— éœ€è¯»å–整个列。对于多维数组,您å¯ä»¥ä½¿ç”¨ `sizeN-1`,其中 `N` 是所需的维度。 **例å­** From a70b09b4e570f5a90bbb66ca6ff140bef1737b6f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Feb 2024 23:33:36 +0100 Subject: [PATCH 309/356] Investigate an issue with MarkReleaseReady tags #no_ci_cache #job_package_release #job_package_aarch64 tags #job_binary_darwin #job_binary_darwin_aarch64 --- .github/workflows/master.yml | 18 +++++++++++++++++- .github/workflows/release_branches.yml | 18 +++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f18a83e1b97..2853adff48a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -305,7 +305,7 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} MarkReleaseReady: - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + if: ${{ !failure() && !cancelled() }} needs: - BuilderBinDarwin - BuilderBinDarwinAarch64 @@ -313,9 +313,25 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: + - name: Debug + run: | + echo need with different filters + cat << 'EOF' + ${{ toJSON(needs) }} + ${{ toJSON(needs.*.result) }} + no failures ${{ !contains(needs.*.result, 'failure') }} + no skips ${{ !contains(needs.*.result, 'skipped') }} + no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + EOF + - name: Not ready + # fail the job to be able restart it + if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} + run: exit 1 - name: Check out repository code + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} uses: ClickHouse/checkout@v1 - name: Mark Commit Release Ready + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bdb045a70a6..9e95b3d3d8f 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -206,7 +206,7 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} MarkReleaseReady: - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + if: ${{ !failure() && !cancelled() }} needs: - BuilderBinDarwin - BuilderBinDarwinAarch64 @@ -214,9 +214,25 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker-aarch64] steps: + - name: Debug + run: | + echo need with different filters + cat << 'EOF' + ${{ toJSON(needs) }} + ${{ toJSON(needs.*.result) }} + no failures ${{ !contains(needs.*.result, 'failure') }} + no skips ${{ !contains(needs.*.result, 'skipped') }} + no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + EOF + - name: Not ready + # fail the job to be able restart it + if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} + run: exit 1 - name: Check out repository code + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} uses: ClickHouse/checkout@v1 - name: Mark Commit Release Ready + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py From 8e4dec32058615e2167209934422b9382ca8dca2 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 7 Mar 2024 17:23:05 +0800 Subject: [PATCH 310/356] Update nullable.md --- docs/zh/sql-reference/data-types/nullable.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/zh/sql-reference/data-types/nullable.md b/docs/zh/sql-reference/data-types/nullable.md index 7ecbc3d1f40..b1cc9dd7bae 100644 --- a/docs/zh/sql-reference/data-types/nullable.md +++ b/docs/zh/sql-reference/data-types/nullable.md @@ -22,8 +22,7 @@ slug: /zh/sql-reference/data-types/nullable ## nullå­åˆ— {#finding-null} -It is possible to find `NULL` values in a column by using `null` subcolumn without reading the whole column. It returns `1` if the corresponding value is `NULL` and `0` otherwise. -通过使用`null`å­åˆ—å¯ä»¥åœ¨åˆ—中查找`NULL`值,而无需读å–整个列。如果对应的值为`NULL`,则返回`1`,å¦åˆ™è¿”回`0`。 +通过使用 `null` å­åˆ—å¯ä»¥åœ¨åˆ—中查找 `NULL` 值,而无需读å–整个列。如果对应的值为 `NULL`,则返回 `1`,å¦åˆ™è¿”回 `0`。 **示例** From e3f9fb273300997651c017f7da208899ff7f6b83 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 6 Mar 2024 12:45:59 +0100 Subject: [PATCH 311/356] Trigger Mergeable check unconditionally in the final_check.py --- tests/ci/finish_check.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index e5268947304..eebc846f4b1 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -11,7 +11,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - update_mergeable_check, + trigger_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -24,14 +24,11 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - # Update the Mergeable Check at the final step - update_mergeable_check(commit, pr_info, CI_STATUS_NAME) + # Unconditionally update the Mergeable Check at the final step + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses) - statuses = [ - status - for status in get_commit_filtered_statuses(commit) - if status.context == CI_STATUS_NAME - ] + statuses = [s for s in statuses if s.context == CI_STATUS_NAME] if not statuses: return # Take the latest status From 0edd614349dbca31b27bb4078ff44b9480bb8354 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 6 Mar 2024 14:42:19 +0100 Subject: [PATCH 312/356] Fix RESTORE async with Replicated database --- src/Backups/BackupsWorker.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 7c6f0c74b1a..e351464d928 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -864,7 +865,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - scheduleFromThreadPool( + thread_pool.scheduleOrThrowOnError( [this, restore_query, restore_id, @@ -876,6 +877,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt on_exception, process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { + CurrentThread::QueryScope query_scope(context_in_use); try { doRestore( @@ -891,9 +893,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt { on_exception(restore_id, backup_name_for_logging, restore_settings, restore_coordination); } - }, - thread_pool, - "RestoreWorker"); + }); } else { From 484453c703be20863fe4450c62a3c41d5b8416ac Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 7 Mar 2024 11:08:56 +0100 Subject: [PATCH 313/356] Create a rare-changing clickhouse/cctools --- docker/images.json | 4 +++ docker/packager/binary-builder/Dockerfile | 39 +---------------------- docker/packager/cctools/Dockerfile | 31 ++++++++++++++++++ 3 files changed, 36 insertions(+), 38 deletions(-) create mode 100644 docker/packager/cctools/Dockerfile diff --git a/docker/images.json b/docker/images.json index 0d852b1ca8d..7439517379b 100644 --- a/docker/images.json +++ b/docker/images.json @@ -3,6 +3,10 @@ "name": "clickhouse/binary-builder", "dependent": [] }, + "docker/packager/cctools": { + "name": "clickhouse/cctools", + "dependent": [] + }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", "dependent": [] diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 6c2fc3dba57..1d8e9a67e80 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -1,42 +1,5 @@ # docker build -t clickhouse/binary-builder . ARG FROM_TAG=latest -FROM clickhouse/fasttest:latest AS cctools -# The cctools are built always from the clickhouse/test-util:latest and cached inline -# Theoretically, it should improve rebuild speed significantly -ENV CC=clang-${LLVM_VERSION} -ENV CXX=clang++-${LLVM_VERSION} -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES -# THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# libtapi is required to support .tbh format from recent MacOS SDKs -RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ - && cd apple-libtapi \ - && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ - && INSTALLPREFIX=/cctools ./build.sh \ - && ./install.sh \ - && cd .. \ - && rm -rf apple-libtapi - -# Build and install tools for cross-linking to Darwin (x86-64) -# Build and install tools for cross-linking to Darwin (aarch64) -RUN git clone https://github.com/tpoechtrager/cctools-port.git \ - && cd cctools-port/cctools \ - && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=x86_64-apple-darwin \ - && make install -j$(nproc) \ - && make clean \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=aarch64-apple-darwin \ - && make install -j$(nproc) \ - && cd ../.. \ - && rm -rf cctools-port - -# !!!!!!!!!!! -# END COMPILE -# !!!!!!!!!!! - FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} @@ -110,7 +73,7 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -COPY --from=cctools /cctools /cctools +COPY --from=clickhouse/cctools /cctools /cctools RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile new file mode 100644 index 00000000000..1b8c675a5c5 --- /dev/null +++ b/docker/packager/cctools/Dockerfile @@ -0,0 +1,31 @@ +# This is a hack to significantly reduce the build time of the clickhouse/binary-builder +# It's based on the assumption that we don't care of the cctools version so much +# It event does not depend on the clickhouse/fasttest in the `docker/images.json` +ARG FROM_TAG=latest +FROM clickhouse/fasttest:$FROM_TAG + +ENV CC=clang-${LLVM_VERSION} +ENV CXX=clang++-${LLVM_VERSION} + +RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ + && cd apple-libtapi \ + && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ + && INSTALLPREFIX=/cctools ./build.sh \ + && ./install.sh \ + && cd .. \ + && rm -rf apple-libtapi + +# Build and install tools for cross-linking to Darwin (x86-64) +# Build and install tools for cross-linking to Darwin (aarch64) +RUN git clone https://github.com/tpoechtrager/cctools-port.git \ + && cd cctools-port/cctools \ + && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=x86_64-apple-darwin \ + && make install -j$(nproc) \ + && make clean \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=aarch64-apple-darwin \ + && make install -j$(nproc) \ + && cd ../.. \ + && rm -rf cctools-port From 38e94e2177df37f5c86b78c3dd49f4d92692b63d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 7 Mar 2024 11:10:36 +0100 Subject: [PATCH 314/356] apply fix to backup thread --- src/Backups/BackupsWorker.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index e351464d928..b9a43d42fea 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -487,7 +487,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - scheduleFromThreadPool( + thread_pool.scheduleOrThrowOnError( [this, backup_query, backup_id, @@ -500,6 +500,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context on_exception, process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { + CurrentThread::QueryScope query_scope(context_in_use); BackupMutablePtr backup_async; try { @@ -518,8 +519,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context { on_exception(backup_async, backup_id, backup_name_for_logging, backup_settings, backup_coordination); } - }, - thread_pool, "BackupWorker"); + }); } else { From 922d4cee63b11edabe218bf6ef1686b834c318d4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 7 Mar 2024 11:31:23 +0100 Subject: [PATCH 315/356] Set the static image tag for cctools sources --- docker/packager/binary-builder/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 1d8e9a67e80..96c90403187 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -73,7 +73,8 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -COPY --from=clickhouse/cctools /cctools /cctools +# If the cctools is updated, then first build it in the CI, then update here in a different commit +COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir From 54b6e5a42d2c5deb353357ae324189e76707e77c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 7 Mar 2024 12:37:29 +0100 Subject: [PATCH 316/356] Better --- src/Backups/BackupsWorker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 1798734a5d0..1b4279ca9a7 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -500,10 +500,10 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context on_exception, process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { - setThreadName("BackupWorker"); BackupMutablePtr backup_async; try { + setThreadName("BackupWorker"); CurrentThread::QueryScope query_scope(context_in_use); doBackup( backup_async, @@ -878,9 +878,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt on_exception, process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { - setThreadName("RestorerWorker"); try { + setThreadName("RestorerWorker"); CurrentThread::QueryScope query_scope(context_in_use); doRestore( restore_query, From b1b999f1c89d213164ba20b01c1a37c16e93d068 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:44:57 +0100 Subject: [PATCH 317/356] fix typo --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 5bfe22d23a2..9b2ded7b6ce 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -8,7 +8,7 @@ title: Window Functions Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. -## Standard Window Functionos +## Standard Window Functions ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. From b037726180467e84fb96ba5c1513f3f3bdd699ad Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 7 Mar 2024 13:07:32 +0100 Subject: [PATCH 318/356] Update README.md Adding upcoming events, fixing release call links --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9ada350d173..0ca338a3317 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,21 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. +Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. + +* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11 +* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19 +* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20 +* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21 +* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23 +* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16 +* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23 +* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 + ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now! +* **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now! * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) From 62f9a00da833559562587294c09a1b5d88f31c81 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 6 Mar 2024 20:28:38 +0100 Subject: [PATCH 319/356] shuffle resolved IPs --- programs/copier/Internals.cpp | 2 +- src/Access/Common/AllowedClientHosts.cpp | 2 +- src/Client/ConnectionParameters.cpp | 2 +- src/Common/DNSResolver.cpp | 11 +++++++++-- src/Common/DNSResolver.h | 1 + src/Coordination/KeeperStateManager.cpp | 2 +- src/Interpreters/Context.cpp | 2 +- 7 files changed, 15 insertions(+), 7 deletions(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0cfff7e3f6c..dcd199c6b38 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -259,7 +259,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std res.is_remote = 1; for (const auto & replica : replicas) { - if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name))) + if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front())) { res.is_remote = 0; break; diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index c677465a7a1..bee0cdd7264 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -55,7 +55,7 @@ namespace { IPAddress addr_v6 = toIPv6(address); - auto host_addresses = DNSResolver::instance().resolveHostAll(host); + auto host_addresses = DNSResolver::instance().resolveHostAllInOriginOrder(host); for (const auto & addr : host_addresses) { diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 5c22b6c6d3f..16911f97e84 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -115,7 +115,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// At the same time, I want clickhouse-local to always work, regardless. /// TODO: get rid of glibc, or replace getaddrinfo to c-ares. - compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host))) + compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front())) ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts() diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 250a8b3fc49..5b5f5369d5e 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -202,10 +202,10 @@ DNSResolver::DNSResolver() : impl(std::make_unique()), log(ge Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { - return pickAddress(resolveHostAll(host)); + return pickAddress(resolveHostAll(host)); // random order -> random pick } -DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) +DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) return resolveIPAddressImpl(host); @@ -214,6 +214,13 @@ DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) return resolveIPAddressWithCache(impl->cache_host, host); } +DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) +{ + auto addresses = resolveHostAllInOriginOrder(host); + std::shuffle(addresses.begin(), addresses.end(), thread_local_rng); + return addresses; +} + Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) { if (impl->disable_cache) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 27d81c9442a..6309eba888b 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,7 @@ public: Poco::Net::IPAddress resolveHost(const std::string & host); /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs + IPAddresses resolveHostAllInOriginOrder(const std::string & host); IPAddresses resolveHostAll(const std::string & host); /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 87c56909387..c30df0b6313 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -30,7 +30,7 @@ bool isLocalhost(const std::string & hostname) { try { - return isLocalAddress(DNSResolver::instance().resolveHost(hostname)); + return isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(hostname).front()); } catch (...) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cdeaa46cff2..a81392cb3d8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3270,7 +3270,7 @@ bool checkZooKeeperConfigIsLocal(const Poco::Util::AbstractConfiguration & confi if (startsWith(key, "node")) { String host = config.getString(config_name + "." + key + ".host"); - if (isLocalAddress(DNSResolver::instance().resolveHost(host))) + if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front())) return true; } } From 2a62ec26557ff7ec199e749244314eada857de39 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 7 Mar 2024 12:05:47 +0100 Subject: [PATCH 320/356] add comment to ping the CI --- src/Common/DNSResolver.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 6309eba888b..e3030e51a96 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,7 +34,9 @@ public: Poco::Net::IPAddress resolveHost(const std::string & host); /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs + /// resolveHostAllInOriginOrder returns addresses with the same order as system call returns it IPAddresses resolveHostAllInOriginOrder(const std::string & host); + /// resolveHostAll returns addresses in random order IPAddresses resolveHostAll(const std::string & host); /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port From 61d64fd7e0ae10fa8a4573722e8221f8cbfef263 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 7 Mar 2024 12:23:52 +0000 Subject: [PATCH 321/356] CI: test build's checkout step issue #no_ci_cache #job_package_debug #job_style_check --- .github/workflows/reusable_build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index 80d78d93e1b..d2fe6f5dbe7 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -43,7 +43,8 @@ jobs: runs-on: [self-hosted, '${{inputs.runner_type}}'] steps: - name: Check out repository code - uses: ClickHouse/checkout@v1 + # WIP: temporary try commit with limited perallelization of checkout + uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232 with: clear-repository: true ref: ${{ fromJson(inputs.data).git_ref }} From 5ae203ce4d212cb57527ba92712ac9479842101d Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 7 Mar 2024 14:03:21 +0100 Subject: [PATCH 322/356] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0ca338a3317..e00ce42a60b 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ curl https://clickhouse.com/ | sh * [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. +## Monthly Release & Community Call + +Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. + +* [v24.3 Community Call](https://clickhouse.com/company/events/v24-3-community-release-call) - Mar 26 +* [v24.4 Community Call](https://clickhouse.com/company/events/v24-4-community-release-call) - Apr 30 + ## Upcoming Events Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. @@ -48,8 +55,6 @@ Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now! -* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) - ## Interested in joining ClickHouse and making it your full-time job? From a23d0bb75b85ca371c5d32e977ba8dc861406e8d Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Thu, 7 Mar 2024 21:06:41 +0800 Subject: [PATCH 323/356] Update settings.md From cd4b60b51a26f3fda2a772d8949b521c19d688ec Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 7 Mar 2024 13:38:00 +0000 Subject: [PATCH 324/356] Rename the setting --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 2 +- src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- src/Planner/Planner.cpp | 2 +- src/Planner/findParallelReplicasQuery.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b1cabe66aaf..9f22d35bb9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -186,7 +186,7 @@ class IColumn; \ M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ - M(Bool, parallel_replicas_allow_subqueries_for_in, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3f816586085..e7b96cee9d3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,7 +90,7 @@ static std::map sett {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"parallel_replicas_allow_subqueries_for_in", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index bb3bd120303..64b6eb5dce9 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -216,7 +216,7 @@ private: if (enable_parallel_processing_of_joins) { /// We don't enable parallel replicas for IN (subquery) - if (!settings.parallel_replicas_allow_subqueries_for_in && ast->as()) + if (!settings.parallel_replicas_allow_in_with_subquery && ast->as()) { if (settings.allow_experimental_parallel_reading_from_replicas == 1) { diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 219f67ecbd8..8082a01d41b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1373,7 +1373,7 @@ void Planner::buildPlanForQueryNode() const auto & settings = query_context->getSettingsRef(); if (query_context->canUseTaskBasedParallelReplicas()) { - if (!settings.parallel_replicas_allow_subqueries_for_in && planner_context->getPreparedSets().hasSubqueries()) + if (!settings.parallel_replicas_allow_in_with_subquery && planner_context->getPreparedSets().hasSubqueries()) { if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index e0e47915047..ef640bcd42d 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -196,7 +196,7 @@ const QueryNode * findQueryForParallelReplicas( const auto * filter = typeid_cast(step); const auto * creating_sets = typeid_cast(step); - bool allowed_creating_sets = settings.parallel_replicas_allow_subqueries_for_in && creating_sets; + bool allowed_creating_sets = settings.parallel_replicas_allow_in_with_subquery && creating_sets; if (!expression && !filter && !allowed_creating_sets) can_distribute_full_node = false; From 5b28614f4ccda9a4620d4121479b3328d5aea04f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 7 Mar 2024 14:50:37 +0100 Subject: [PATCH 325/356] Fix TableFunctionExecutable::skipAnalysisForArguments --- src/TableFunctions/TableFunctionExecutable.cpp | 3 +++ .../03006_analyzer_executable_table_function.reference | 0 .../0_stateless/03006_analyzer_executable_table_function.sql | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/03006_analyzer_executable_table_function.reference create mode 100644 tests/queries/0_stateless/03006_analyzer_executable_table_function.sql diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 9fa5ddf15c2..2c3802e8667 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -71,6 +71,9 @@ std::vector TableFunctionExecutable::skipAnalysisForArguments(const Quer const auto & table_function_node_arguments = table_function_node.getArguments().getNodes(); size_t table_function_node_arguments_size = table_function_node_arguments.size(); + if (table_function_node_arguments_size <= 2) + return {}; + std::vector result_indexes; result_indexes.reserve(table_function_node_arguments_size - 2); for (size_t i = 2; i < table_function_node_arguments_size; ++i) diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference b/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql new file mode 100644 index 00000000000..4edbcc97f50 --- /dev/null +++ b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql @@ -0,0 +1,4 @@ +SELECT + toFixedString(toFixedString(toLowCardinality(toFixedString('--------------------', toNullable(20))), toLowCardinality(20)), 20), + * +FROM executable('data String', SETTINGS max_command_execution_time = 100); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} From 3a4ef70a208ca4452f646bb78f1fb96a12bc377e Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Thu, 7 Mar 2024 06:03:33 -0800 Subject: [PATCH 326/356] Make better looking --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49dc12b89d8..b55e9810361 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -321,6 +321,7 @@ endif () # Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") + # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") From 2db1876750b1fb007b11ba5859b1a816bf6eabc7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 7 Mar 2024 15:42:45 +0000 Subject: [PATCH 327/356] Fixing analyzer. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 131 ++++++++++-------- src/Analyzer/Passes/QueryAnalysisPass.h | 5 +- src/Analyzer/QueryTreePassManager.cpp | 4 +- src/Analyzer/QueryTreePassManager.h | 2 +- .../InterpreterSelectQueryAnalyzer.cpp | 2 +- .../02999_scalar_subqueries_bug_1.sql | 4 +- 6 files changed, 86 insertions(+), 62 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 907a732493d..7ff1624ee4a 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1068,6 +1068,8 @@ private: class QueryAnalyzer { public: + explicit QueryAnalyzer(bool only_analyze_) : only_analyze(only_analyze_) {} + void resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context) { IdentifierResolveScope scope(node, nullptr /*parent_scope*/); @@ -1430,6 +1432,7 @@ private: /// Global scalar subquery to scalar value map std::unordered_map scalar_subquery_to_scalar_value; + const bool only_analyze; }; /// Utility functions implementation @@ -1977,80 +1980,97 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); auto io = interpreter->execute(); - + std::cerr << StackTrace().toString() << std::endl; PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(context->getProgressCallback()); io.pipeline.setProcessListElement(context->getProcessListElement()); - Block block; - - while (block.rows() == 0 && executor.pull(block)) + if (only_analyze) { - } - - if (block.rows() == 0) - { - auto types = interpreter->getSampleBlock().getDataTypes(); - if (types.size() != 1) - types = {std::make_shared(types)}; - - auto & type = types[0]; - if (!type->isNullable()) + /// If query is only analyzed, then constants are not correct. + scalar_block = interpreter->getSampleBlock(); + for (auto & column : scalar_block) { - if (!type->canBeInsideNullable()) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, - "Scalar subquery returned empty result of type {} which cannot be Nullable", - type->getName()); - - type = makeNullable(type); + if (column.column->empty()) + { + auto mut_col = column.column->cloneEmpty(); + mut_col->insertDefault(); + column.column = std::move(mut_col); + } } - - auto scalar_column = type->createColumn(); - scalar_column->insert(Null()); - scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - if (block.rows() != 1) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + Block block; - Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + while (block.rows() == 0 && executor.pull(block)) { } - if (tmp_block.rows() != 0) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - - block = materializeBlock(block); - size_t columns = block.columns(); - - if (columns == 1) + if (block.rows() == 0) { - auto & column = block.getByPosition(0); - /// Here we wrap type to nullable if we can. - /// It is needed cause if subquery return no rows, it's result will be Null. - /// In case of many columns, do not check it cause tuple can't be nullable. - if (!column.type->isNullable() && column.type->canBeInsideNullable()) + auto types = interpreter->getSampleBlock().getDataTypes(); + if (types.size() != 1) + types = {std::make_shared(types)}; + + auto & type = types[0]; + if (!type->isNullable()) { - column.type = makeNullable(column.type); - column.column = makeNullable(column.column); + if (!type->canBeInsideNullable()) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, + "Scalar subquery returned empty result of type {} which cannot be Nullable", + type->getName()); + + type = makeNullable(type); } - scalar_block = block; + auto scalar_column = type->createColumn(); + scalar_column->insert(Null()); + scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - /** Make unique column names for tuple. - * - * Example: SELECT (SELECT 2 AS x, x) - */ - makeUniqueColumnNamesInBlock(block); + if (block.rows() != 1) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - scalar_block.insert({ - ColumnTuple::create(block.getColumns()), - std::make_shared(block.getDataTypes(), block.getNames()), - "tuple"}); + Block tmp_block; + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + { + } + + if (tmp_block.rows() != 0) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + + block = materializeBlock(block); + size_t columns = block.columns(); + + if (columns == 1) + { + auto & column = block.getByPosition(0); + /// Here we wrap type to nullable if we can. + /// It is needed cause if subquery return no rows, it's result will be Null. + /// In case of many columns, do not check it cause tuple can't be nullable. + if (!column.type->isNullable() && column.type->canBeInsideNullable()) + { + column.type = makeNullable(column.type); + column.column = makeNullable(column.column); + } + + scalar_block = block; + } + else + { + /** Make unique column names for tuple. + * + * Example: SELECT (SELECT 2 AS x, x) + */ + makeUniqueColumnNamesInBlock(block); + + scalar_block.insert({ + ColumnTuple::create(block.getColumns()), + std::make_shared(block.getDataTypes(), block.getNames()), + "tuple"}); + } } } @@ -7749,13 +7769,16 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier } -QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_) +QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_) : table_expression(std::move(table_expression_)) + , only_analyze(only_analyze_) {} +QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_analyze_) {} + void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - QueryAnalyzer analyzer; + QueryAnalyzer analyzer(only_analyze); analyzer.resolve(query_tree_node, table_expression, context); createUniqueTableAliases(query_tree_node, table_expression, context); } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h index 5d335d3e712..8c746833eee 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.h +++ b/src/Analyzer/Passes/QueryAnalysisPass.h @@ -71,13 +71,13 @@ public: /** Construct query analysis pass for query or union analysis. * Available columns are extracted from query node join tree. */ - QueryAnalysisPass() = default; + explicit QueryAnalysisPass(bool only_analyze_ = false); /** Construct query analysis pass for expression or list of expressions analysis. * Available expression columns are extracted from table expression. * Table expression node must have query, union, table, table function type. */ - explicit QueryAnalysisPass(QueryTreeNodePtr table_expression_); + QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_ = false); String getName() override { @@ -93,6 +93,7 @@ public: private: QueryTreeNodePtr table_expression; + const bool only_analyze; }; } diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 43bb534a44e..9c07884a464 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -246,9 +246,9 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index) } } -void addQueryTreePasses(QueryTreePassManager & manager) +void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) { - manager.addPass(std::make_unique()); + manager.addPass(std::make_unique(only_analyze)); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Analyzer/QueryTreePassManager.h b/src/Analyzer/QueryTreePassManager.h index 270563590ba..0a0d72a6698 100644 --- a/src/Analyzer/QueryTreePassManager.h +++ b/src/Analyzer/QueryTreePassManager.h @@ -47,6 +47,6 @@ private: std::vector passes; }; -void addQueryTreePasses(QueryTreePassManager & manager); +void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze = false); } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 4897101d80b..922f4a99b4a 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -103,7 +103,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, auto query_tree = buildQueryTree(query, context); QueryTreePassManager query_tree_pass_manager(context); - addQueryTreePasses(query_tree_pass_manager); + addQueryTreePasses(query_tree_pass_manager, select_query_options.only_analyze); /// We should not apply any query tree level optimizations on shards /// because it can lead to a changed header. diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql index 797571a8552..88bcdeb7f77 100644 --- a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql @@ -2,7 +2,7 @@ drop table if exists t_table_select; CREATE TABLE t_table_select (id UInt32) ENGINE = MergeTree ORDER BY id; INSERT INTO t_table_select (id) SELECT number FROM numbers(30); -CREATE TEMPORARY TABLE t_test AS SELECT a.id, b.id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS a GLOBAL LEFT JOIN (SELECT id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS b WHERE (b.id % 10) = 0) AS b ON b.id = a.id SETTINGS join_use_nulls = 1; +CREATE TEMPORARY TABLE t_test (x UInt32, y Nullable(UInt32)) AS SELECT a.id, b.id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS a GLOBAL LEFT JOIN (SELECT id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS b WHERE (b.id % 10) = 0) AS b ON b.id = a.id SETTINGS join_use_nulls = 1; -select * from t_test order by id; +select * from t_test order by x; From 82ba97c3a730a422a01b7765ccd7aca69887bf71 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 7 Mar 2024 17:16:13 +0100 Subject: [PATCH 328/356] More explicit template instantiations (#60730) --- base/base/CMakeLists.txt | 1 + base/base/Decimal.cpp | 87 ++++++ base/base/Decimal.h | 94 +++++-- base/base/extended_types.h | 38 +++ src/Columns/ColumnUnique.cpp | 25 ++ src/Columns/ColumnUnique.h | 21 ++ src/Common/FieldVisitorConvertToNumber.cpp | 23 ++ src/Common/FieldVisitorConvertToNumber.h | 15 + src/DataTypes/DataTypeDecimalBase.h | 6 + src/DataTypes/DataTypesDecimal.cpp | 256 +++++++++++++++++- src/DataTypes/DataTypesDecimal.h | 237 ++++++++-------- src/DataTypes/DataTypesNumber.cpp | 17 ++ src/DataTypes/DataTypesNumber.h | 16 ++ src/DataTypes/IDataType.cpp | 87 ++++++ src/DataTypes/IDataType.h | 142 +++++----- .../Serializations/SerializationDecimalBase.h | 6 + src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/FunctionHelpers.h | 6 +- src/Functions/FunctionStringReplace.h | 6 +- src/Functions/FunctionTokens.h | 10 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsAES.h | 20 +- src/Functions/FunctionsConversion.h | 12 +- src/Functions/JSONArrayLength.cpp | 2 +- src/Functions/URL/URLHierarchy.cpp | 2 +- src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- src/Functions/URL/extractURLParameters.cpp | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 4 +- src/Functions/array/arrayRandomSample.cpp | 4 +- src/Functions/array/arrayShingles.cpp | 4 +- src/Functions/arrayStringConcat.cpp | 4 +- src/Functions/castOrDefault.cpp | 4 +- src/Functions/countMatches.h | 4 +- src/Functions/extractAll.cpp | 4 +- src/Functions/extractAllGroups.h | 4 +- src/Functions/extractGroups.cpp | 4 +- src/Functions/formatQuery.cpp | 2 +- src/Functions/fromDaysSinceYearZero.cpp | 2 +- src/Functions/makeDate.cpp | 54 ++-- src/Functions/parseDateTime.cpp | 6 +- src/Functions/regexpExtract.cpp | 6 +- src/Functions/repeat.cpp | 4 +- src/Functions/seriesDecomposeSTL.cpp | 4 +- src/Functions/seriesOutliersDetectTukey.cpp | 8 +- src/Functions/seriesPeriodDetectFFT.cpp | 2 +- src/Functions/snowflake.cpp | 12 +- src/Functions/space.cpp | 2 +- src/Functions/sqid.cpp | 2 +- src/Functions/timestamp.cpp | 4 +- src/Functions/toDecimalString.cpp | 4 +- .../Impl/PrometheusTextOutputFormat.cpp | 24 +- 52 files changed, 962 insertions(+), 349 deletions(-) create mode 100644 base/base/Decimal.cpp create mode 100644 src/Columns/ColumnUnique.cpp create mode 100644 src/Common/FieldVisitorConvertToNumber.cpp diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 548ba01d86a..610877eae73 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -13,6 +13,7 @@ set (SRCS cgroupsv2.cpp coverage.cpp demangle.cpp + Decimal.cpp getAvailableMemoryAmount.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp diff --git a/base/base/Decimal.cpp b/base/base/Decimal.cpp new file mode 100644 index 00000000000..7e65c0eb8d1 --- /dev/null +++ b/base/base/Decimal.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ + +/// Explicit template instantiations. + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + +template const Decimal & Decimal::operator += (const T & x) { value += x; return *this; } +template const Decimal & Decimal::operator -= (const T & x) { value -= x; return *this; } +template const Decimal & Decimal::operator *= (const T & x) { value *= x; return *this; } +template const Decimal & Decimal::operator /= (const T & x) { value /= x; return *this; } +template const Decimal & Decimal::operator %= (const T & x) { value %= x; return *this; } + +template void NO_SANITIZE_UNDEFINED Decimal::addOverflow(const T & x) { value += x; } + +/// Maybe this explicit instantiation affects performance since operators cannot be inlined. + +template template const Decimal & Decimal::operator += (const Decimal & x) { value += static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator -= (const Decimal & x) { value -= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator *= (const Decimal & x) { value *= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator /= (const Decimal & x) { value /= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator %= (const Decimal & x) { value %= static_cast(x.value); return *this; } + +#define DISPATCH(TYPE_T, TYPE_U) \ + template const Decimal & Decimal::operator += (const Decimal & x); \ + template const Decimal & Decimal::operator -= (const Decimal & x); \ + template const Decimal & Decimal::operator *= (const Decimal & x); \ + template const Decimal & Decimal::operator /= (const Decimal & x); \ + template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +#define DISPATCH(TYPE) template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } +template bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } +template bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } +template bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } +template bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } +template bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } + +#define DISPATCH(TYPE) \ +template bool operator< (const Decimal & x, const Decimal & y); \ +template bool operator> (const Decimal & x, const Decimal & y); \ +template bool operator<= (const Decimal & x, const Decimal & y); \ +template bool operator>= (const Decimal & x, const Decimal & y); \ +template bool operator== (const Decimal & x, const Decimal & y); \ +template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + + +template Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } +template Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } +template Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } +template Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } +template Decimal operator- (const Decimal & x) { return -x.value; } + +#define DISPATCH(TYPE) \ +template Decimal operator+ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x, const Decimal & y); \ +template Decimal operator* (const Decimal & x, const Decimal & y); \ +template Decimal operator/ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE +} diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 66ff623217c..42f9e67c49d 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -10,6 +11,18 @@ namespace DB template struct Decimal; class DateTime64; +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; @@ -50,36 +63,73 @@ struct Decimal return static_cast(value); } - const Decimal & operator += (const T & x) { value += x; return *this; } - const Decimal & operator -= (const T & x) { value -= x; return *this; } - const Decimal & operator *= (const T & x) { value *= x; return *this; } - const Decimal & operator /= (const T & x) { value /= x; return *this; } - const Decimal & operator %= (const T & x) { value %= x; return *this; } + const Decimal & operator += (const T & x); + const Decimal & operator -= (const T & x); + const Decimal & operator *= (const T & x); + const Decimal & operator /= (const T & x); + const Decimal & operator %= (const T & x); - template const Decimal & operator += (const Decimal & x) { value += x.value; return *this; } - template const Decimal & operator -= (const Decimal & x) { value -= x.value; return *this; } - template const Decimal & operator *= (const Decimal & x) { value *= x.value; return *this; } - template const Decimal & operator /= (const Decimal & x) { value /= x.value; return *this; } - template const Decimal & operator %= (const Decimal & x) { value %= x.value; return *this; } + template const Decimal & operator += (const Decimal & x); + template const Decimal & operator -= (const Decimal & x); + template const Decimal & operator *= (const Decimal & x); + template const Decimal & operator /= (const Decimal & x); + template const Decimal & operator %= (const Decimal & x); /// This is to avoid UB for sumWithOverflow() - void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; } + void NO_SANITIZE_UNDEFINED addOverflow(const T & x); T value; }; -template inline bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } -template inline bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } -template inline bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } -template inline bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } -template inline bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } -template inline bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } +#define DISPATCH(TYPE) extern template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH -template inline Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } -template inline Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } -template inline Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } -template inline Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } -template inline Decimal operator- (const Decimal & x) { return -x.value; } +#define DISPATCH(TYPE_T, TYPE_U) \ + extern template const Decimal & Decimal::operator += (const Decimal & x); \ + extern template const Decimal & Decimal::operator -= (const Decimal & x); \ + extern template const Decimal & Decimal::operator *= (const Decimal & x); \ + extern template const Decimal & Decimal::operator /= (const Decimal & x); \ + extern template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y); +template bool operator> (const Decimal & x, const Decimal & y); +template bool operator<= (const Decimal & x, const Decimal & y); +template bool operator>= (const Decimal & x, const Decimal & y); +template bool operator== (const Decimal & x, const Decimal & y); +template bool operator!= (const Decimal & x, const Decimal & y); + +#define DISPATCH(TYPE) \ +extern template bool operator< (const Decimal & x, const Decimal & y); \ +extern template bool operator> (const Decimal & x, const Decimal & y); \ +extern template bool operator<= (const Decimal & x, const Decimal & y); \ +extern template bool operator>= (const Decimal & x, const Decimal & y); \ +extern template bool operator== (const Decimal & x, const Decimal & y); \ +extern template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template Decimal operator+ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x, const Decimal & y); +template Decimal operator* (const Decimal & x, const Decimal & y); +template Decimal operator/ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x); + +#define DISPATCH(TYPE) \ +extern template Decimal operator+ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x, const Decimal & y); \ +extern template Decimal operator* (const Decimal & x, const Decimal & y); \ +extern template Decimal operator/ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE /// Distinguishable type to allow function resolution/deduction based on value type, /// but also relatively easy to convert to/from Decimal64. diff --git a/base/base/extended_types.h b/base/base/extended_types.h index b58df45a97e..796167ab45d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -64,6 +64,44 @@ template <> struct is_arithmetic { static constexpr bool value = true; template inline constexpr bool is_arithmetic_v = is_arithmetic::value; +#define FOR_EACH_ARITHMETIC_TYPE(M) \ + M(DataTypeDate) \ + M(DataTypeDate32) \ + M(DataTypeDateTime) \ + M(DataTypeInt8) \ + M(DataTypeUInt8) \ + M(DataTypeInt16) \ + M(DataTypeUInt16) \ + M(DataTypeInt32) \ + M(DataTypeUInt32) \ + M(DataTypeInt64) \ + M(DataTypeUInt64) \ + M(DataTypeInt128) \ + M(DataTypeUInt128) \ + M(DataTypeInt256) \ + M(DataTypeUInt256) \ + M(DataTypeFloat32) \ + M(DataTypeFloat64) + +#define FOR_EACH_ARITHMETIC_TYPE_PASS(M, X) \ + M(DataTypeDate, X) \ + M(DataTypeDate32, X) \ + M(DataTypeDateTime, X) \ + M(DataTypeInt8, X) \ + M(DataTypeUInt8, X) \ + M(DataTypeInt16, X) \ + M(DataTypeUInt16, X) \ + M(DataTypeInt32, X) \ + M(DataTypeUInt32, X) \ + M(DataTypeInt64, X) \ + M(DataTypeUInt64, X) \ + M(DataTypeInt128, X) \ + M(DataTypeUInt128, X) \ + M(DataTypeInt256, X) \ + M(DataTypeUInt256, X) \ + M(DataTypeFloat32, X) \ + M(DataTypeFloat64, X) + template struct make_unsigned // NOLINT(readability-identifier-naming) { diff --git a/src/Columns/ColumnUnique.cpp b/src/Columns/ColumnUnique.cpp new file mode 100644 index 00000000000..edfee69a752 --- /dev/null +++ b/src/Columns/ColumnUnique.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +/// Explicit template instantiations. +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; + +} diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 34f1ffc15cd..76bbbbacdbf 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -15,6 +15,8 @@ #include #include #include +#include "Columns/ColumnsDateTime.h" +#include "Columns/ColumnsNumber.h" #include #include @@ -736,4 +738,23 @@ UInt128 ColumnUnique::IncrementalHash::getHash(const ColumnType & co return cur_hash; } + +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; + } diff --git a/src/Common/FieldVisitorConvertToNumber.cpp b/src/Common/FieldVisitorConvertToNumber.cpp new file mode 100644 index 00000000000..75b3fbfe02a --- /dev/null +++ b/src/Common/FieldVisitorConvertToNumber.cpp @@ -0,0 +1,23 @@ +#include +#include "base/Decimal.h" + +namespace DB +{ + +/// Explicit template instantiations. +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; + +} diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h index 47a1e669969..86e03cb5d77 100644 --- a/src/Common/FieldVisitorConvertToNumber.h +++ b/src/Common/FieldVisitorConvertToNumber.h @@ -117,4 +117,19 @@ public: T operator() (const bool & x) const { return T(x); } }; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; + } diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index adbe9c95b14..9887dfabcdb 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -207,4 +207,10 @@ inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value) return std::make_shared>(precision_value, scale_value); } +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; + } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 7ad9f0b6fd8..77a7a3e7237 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -112,6 +112,256 @@ static DataTypePtr createExact(const ASTPtr & arguments) return createDecimal(precision, scale); } +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>; + using MaxNativeType = typename MaxFieldType::NativeType; + + static constexpr bool throw_exception = std::is_same_v; + + MaxNativeType converted_value; + if (scale_to > scale_from) + { + converted_value = DecimalUtils::scaleMultiplier(scale_to - scale_from); + if (common::mulOverflow(static_cast(value.value), converted_value, converted_value)) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}", + std::string(ToDataType::family_name), toString(value.value), toString(converted_value)); + else + return ReturnType(false); + } + } + else if (scale_to == scale_from) + { + converted_value = value.value; + } + else + { + converted_value = value.value / DecimalUtils::scaleMultiplier(scale_from - scale_to); + } + + if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) + { + if (converted_value < std::numeric_limits::min() || + converted_value > std::numeric_limits::max()) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})", + std::string(ToDataType::family_name), toString(converted_value), + toString(std::numeric_limits::min()), + toString(std::numeric_limits::max())); + else + return ReturnType(false); + } + } + + result = static_cast(converted_value); + + return ReturnType(true); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to) +{ + using ToFieldType = typename ToDataType::FieldType; + ToFieldType result; + + convertDecimalsImpl(value, scale_from, scale_to, result); + + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) +{ + return convertDecimalsImpl(value, scale_from, scale_to, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + + return DecimalUtils::convertToImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +{ + typename ToDataType::FieldType result; + + convertFromDecimalImpl(value, scale, result); + + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) +{ + return convertFromDecimalImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + using ToNativeType = typename ToFieldType::NativeType; + + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (std::is_floating_point_v) + { + if (!std::isfinite(value)) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name); + else + return ReturnType(false); + } + + auto out = value * static_cast(DecimalUtils::scaleMultiplier(scale)); + + if (out <= static_cast(std::numeric_limits::min()) || + out >= static_cast(std::numeric_limits::max())) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name); + else + return ReturnType(false); + } + + result = static_cast(out); + return ReturnType(true); + } + else + { + if constexpr (is_big_int_v) + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + else if constexpr (std::is_same_v) + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + else + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + } +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +{ + typename ToDataType::FieldType result; + convertToDecimalImpl(value, scale, result); + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) +{ + return convertToDecimalImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +DataTypePtr createDecimalMaxPrecision(UInt64 scale) +{ + return std::make_shared>(DecimalUtils::max_precision, scale); +} + +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +/// Explicit template instantiations. +template class DataTypeDecimal; +template class DataTypeDecimal; +template class DataTypeDecimal; +template class DataTypeDecimal; + void registerDataTypeDecimal(DataTypeFactory & factory) { factory.registerDataType("Decimal32", createExact, DataTypeFactory::CaseInsensitive); @@ -125,10 +375,4 @@ void registerDataTypeDecimal(DataTypeFactory & factory) factory.registerAlias("FIXED", "Decimal", DataTypeFactory::CaseInsensitive); } -/// Explicit template instantiations. -template class DataTypeDecimal; -template class DataTypeDecimal; -template class DataTypeDecimal; -template class DataTypeDecimal; - } diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index e2b433cbe2f..badefc4c75a 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -3,7 +3,11 @@ #include #include #include +#include +#include #include +#include +#include #include #include @@ -13,7 +17,6 @@ namespace DB namespace ErrorCodes { - extern const int DECIMAL_OVERFLOW; extern const int LOGICAL_ERROR; } @@ -99,171 +102,145 @@ inline UInt32 getDecimalScale(const DataTypeDecimal & data_type) return data_type.getScale(); } +#define FOR_EACH_DECIMAL_TYPE(M) \ + M(DataTypeDecimal) \ + M(DataTypeDateTime64) \ + M(DataTypeDecimal32) \ + M(DataTypeDecimal64) \ + M(DataTypeDecimal128) \ + M(DataTypeDecimal256) + +#define FOR_EACH_DECIMAL_TYPE_PASS(M, X) \ + M(DataTypeDecimal, X) \ + M(DataTypeDateTime64, X) \ + M(DataTypeDecimal32, X) \ + M(DataTypeDecimal64, X) \ + M(DataTypeDecimal128, X) \ + M(DataTypeDecimal256, X) + + template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>; - using MaxNativeType = typename MaxFieldType::NativeType; +ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result); - static constexpr bool throw_exception = std::is_same_v; +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - MaxNativeType converted_value; - if (scale_to > scale_from) - { - converted_value = DecimalUtils::scaleMultiplier(scale_to - scale_from); - if (common::mulOverflow(static_cast(value.value), converted_value, converted_value)) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}", - std::string(ToDataType::family_name), toString(value.value), toString(converted_value)); - else - return ReturnType(false); - } - } - else if (scale_to == scale_from) - { - converted_value = value.value; - } - else - { - converted_value = value.value / DecimalUtils::scaleMultiplier(scale_from - scale_to); - } - - if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) - { - if (converted_value < std::numeric_limits::min() || - converted_value > std::numeric_limits::max()) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})", - std::string(ToDataType::family_name), toString(converted_value), - toString(std::numeric_limits::min()), - toString(std::numeric_limits::max())); - else - return ReturnType(false); - } - } - - result = static_cast(converted_value); - - return ReturnType(true); -} template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to) -{ - using ToFieldType = typename ToDataType::FieldType; - ToFieldType result; +typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to); - convertDecimalsImpl(value, scale_from, scale_to, result); +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return result; -} template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) -{ - return convertDecimalsImpl(value, scale_from, scale_to, result); -} +bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (IsDataTypeDecimal && is_arithmetic_v) -inline ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; +ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return DecimalUtils::convertToImpl(value, scale, result); -} template requires (IsDataTypeDecimal && is_arithmetic_v) -inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) -{ - typename ToDataType::FieldType result; +typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale); - convertFromDecimalImpl(value, scale, result); +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return result; -} template requires (IsDataTypeDecimal && is_arithmetic_v) -inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - return convertFromDecimalImpl(value, scale, result); -} +bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (is_arithmetic_v && IsDataTypeDecimal) -inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - using ToNativeType = typename ToFieldType::NativeType; +ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); - static constexpr bool throw_exception = std::is_same_v; +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - if constexpr (std::is_floating_point_v) - { - if (!std::isfinite(value)) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name); - else - return ReturnType(false); - } - - auto out = value * static_cast(DecimalUtils::scaleMultiplier(scale)); - - if (out <= static_cast(std::numeric_limits::min()) || - out >= static_cast(std::numeric_limits::max())) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name); - else - return ReturnType(false); - } - - result = static_cast(out); - return ReturnType(true); - } - else - { - if constexpr (is_big_int_v) - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - else if constexpr (std::is_same_v) - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - else - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - } -} template requires (is_arithmetic_v && IsDataTypeDecimal) -inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale) -{ - typename ToDataType::FieldType result; - convertToDecimalImpl(value, scale, result); - return result; -} +typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (is_arithmetic_v && IsDataTypeDecimal) -inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - return convertToDecimalImpl(value, scale, result); -} +bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template -inline DataTypePtr createDecimalMaxPrecision(UInt64 scale) -{ - return std::make_shared>(DecimalUtils::max_precision, scale); -} +DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; } diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 008fa287064..99446d24eed 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -102,4 +102,21 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); } +/// Explicit template instantiations. +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; + +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; + } diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h index 0c1f88a7925..d550ceababc 100644 --- a/src/DataTypes/DataTypesNumber.h +++ b/src/DataTypes/DataTypesNumber.h @@ -55,6 +55,22 @@ private: bool unsigned_can_be_signed = false; }; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; + +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; + using DataTypeUInt8 = DataTypeNumber; using DataTypeUInt16 = DataTypeNumber; using DataTypeUInt32 = DataTypeNumber; diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 392c56343e3..40915418aea 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -267,4 +267,91 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column) return column.type->getDefaultSerialization(); } +#define FOR_TYPES_OF_TYPE(M) \ + M(TypeIndex) \ + M(const IDataType &) \ + M(const DataTypePtr &) \ + M(WhichDataType) + +#define DISPATCH(TYPE) \ +bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \ +bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \ +bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \ +bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \ +bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \ +bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \ +\ +bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \ +bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \ +bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \ +bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \ +bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \ +bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \ +\ +bool isInteger(TYPE data_type) { return WhichDataType(data_type).isInteger(); } \ +bool isNativeInteger(TYPE data_type) { return WhichDataType(data_type).isNativeInteger(); } \ +\ +bool isDecimal(TYPE data_type) { return WhichDataType(data_type).isDecimal(); } \ +\ +bool isFloat(TYPE data_type) { return WhichDataType(data_type).isFloat(); } \ +\ +bool isNativeNumber(TYPE data_type) { return WhichDataType(data_type).isNativeNumber(); } \ +bool isNumber(TYPE data_type) { return WhichDataType(data_type).isNumber(); } \ +\ +bool isEnum8(TYPE data_type) { return WhichDataType(data_type).isEnum8(); } \ +bool isEnum16(TYPE data_type) { return WhichDataType(data_type).isEnum16(); } \ +bool isEnum(TYPE data_type) { return WhichDataType(data_type).isEnum(); } \ +\ +bool isDate(TYPE data_type) { return WhichDataType(data_type).isDate(); } \ +bool isDate32(TYPE data_type) { return WhichDataType(data_type).isDate32(); } \ +bool isDateOrDate32(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32(); } \ +bool isDateTime(TYPE data_type) { return WhichDataType(data_type).isDateTime(); } \ +bool isDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTime64(); } \ +bool isDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); } \ +bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); } \ +\ +bool isString(TYPE data_type) { return WhichDataType(data_type).isString(); } \ +bool isFixedString(TYPE data_type) { return WhichDataType(data_type).isFixedString(); } \ +bool isStringOrFixedString(TYPE data_type) { return WhichDataType(data_type).isStringOrFixedString(); } \ +\ +bool isUUID(TYPE data_type) { return WhichDataType(data_type).isUUID(); } \ +bool isIPv4(TYPE data_type) { return WhichDataType(data_type).isIPv4(); } \ +bool isIPv6(TYPE data_type) { return WhichDataType(data_type).isIPv6(); } \ +bool isArray(TYPE data_type) { return WhichDataType(data_type).isArray(); } \ +bool isTuple(TYPE data_type) { return WhichDataType(data_type).isTuple(); } \ +bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \ +bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \ +bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ +bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \ +bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \ +\ +bool isColumnedAsNumber(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); \ +} \ +\ +bool isColumnedAsDecimal(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isDecimal() || which.isDateTime64(); \ +} \ +\ +bool isNotCreatable(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isNothing() || which.isFunction() || which.isSet(); \ +} \ +\ +bool isNotDecimalButComparableToDecimal(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isInt() || which.isUInt() || which.isFloat(); \ +} \ + +FOR_TYPES_OF_TYPE(DISPATCH) + +#undef DISPATCH +#undef FOR_TYPES_OF_TYPE + } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 220658afda5..55f584ef1e0 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -424,71 +424,76 @@ struct WhichDataType /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) -template inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); } -template inline bool isUInt16(const T & data_type) { return WhichDataType(data_type).isUInt16(); } -template inline bool isUInt32(const T & data_type) { return WhichDataType(data_type).isUInt32(); } -template inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); } -template inline bool isNativeUInt(const T & data_type) { return WhichDataType(data_type).isNativeUInt(); } -template inline bool isUInt(const T & data_type) { return WhichDataType(data_type).isUInt(); } +#define FOR_TYPES_OF_TYPE(M) \ + M(TypeIndex) \ + M(const IDataType &) \ + M(const DataTypePtr &) \ + M(WhichDataType) -template inline bool isInt8(const T & data_type) { return WhichDataType(data_type).isInt8(); } -template inline bool isInt16(const T & data_type) { return WhichDataType(data_type).isInt16(); } -template inline bool isInt32(const T & data_type) { return WhichDataType(data_type).isInt32(); } -template inline bool isInt64(const T & data_type) { return WhichDataType(data_type).isInt64(); } -template inline bool isNativeInt(const T & data_type) { return WhichDataType(data_type).isNativeInt(); } -template inline bool isInt(const T & data_type) { return WhichDataType(data_type).isInt(); } +#define DISPATCH(TYPE) \ +bool isUInt8(TYPE data_type); \ +bool isUInt16(TYPE data_type); \ +bool isUInt32(TYPE data_type); \ +bool isUInt64(TYPE data_type); \ +bool isNativeUInt(TYPE data_type); \ +bool isUInt(TYPE data_type); \ +\ +bool isInt8(TYPE data_type); \ +bool isInt16(TYPE data_type); \ +bool isInt32(TYPE data_type); \ +bool isInt64(TYPE data_type); \ +bool isNativeInt(TYPE data_type); \ +bool isInt(TYPE data_type); \ +\ +bool isInteger(TYPE data_type); \ +bool isNativeInteger(TYPE data_type); \ +\ +bool isDecimal(TYPE data_type); \ +\ +bool isFloat(TYPE data_type); \ +\ +bool isNativeNumber(TYPE data_type); \ +bool isNumber(TYPE data_type); \ +\ +bool isEnum8(TYPE data_type); \ +bool isEnum16(TYPE data_type); \ +bool isEnum(TYPE data_type); \ +\ +bool isDate(TYPE data_type); \ +bool isDate32(TYPE data_type); \ +bool isDateOrDate32(TYPE data_type); \ +bool isDateTime(TYPE data_type); \ +bool isDateTime64(TYPE data_type); \ +bool isDateTimeOrDateTime64(TYPE data_type); \ +bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type); \ +\ +bool isString(TYPE data_type); \ +bool isFixedString(TYPE data_type); \ +bool isStringOrFixedString(TYPE data_type); \ +\ +bool isUUID(TYPE data_type); \ +bool isIPv4(TYPE data_type); \ +bool isIPv6(TYPE data_type); \ +bool isArray(TYPE data_type); \ +bool isTuple(TYPE data_type); \ +bool isMap(TYPE data_type); \ +bool isInterval(TYPE data_type); \ +bool isObject(TYPE data_type); \ +bool isVariant(TYPE data_type); \ +bool isNothing(TYPE data_type); \ +\ +bool isColumnedAsNumber(TYPE data_type); \ +\ +bool isColumnedAsDecimal(TYPE data_type); \ +\ +bool isNotCreatable(TYPE data_type); \ +\ +bool isNotDecimalButComparableToDecimal(TYPE data_type); \ -template inline bool isInteger(const T & data_type) { return WhichDataType(data_type).isInteger(); } -template inline bool isNativeInteger(const T & data_type) { return WhichDataType(data_type).isNativeInteger(); } +FOR_TYPES_OF_TYPE(DISPATCH) -template inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); } - -template inline bool isFloat(const T & data_type) { return WhichDataType(data_type).isFloat(); } - -template inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); } -template inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); } - -template inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); } -template inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); } -template inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } - -template inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } -template inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); } -template inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); } -template inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); } -template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } -template inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); } -template inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); } - -template inline bool isString(const T & data_type) { return WhichDataType(data_type).isString(); } -template inline bool isFixedString(const T & data_type) { return WhichDataType(data_type).isFixedString(); } -template inline bool isStringOrFixedString(const T & data_type) { return WhichDataType(data_type).isStringOrFixedString(); } - -template inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); } -template inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); } -template inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); } -template inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); } -template inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); } -template inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); } -template inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); } -template inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); } -template inline bool isVariant(const T & data_type) { return WhichDataType(data_type).isVariant(); } - -template inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); } - -template -inline bool isColumnedAsNumber(const T & data_type) -{ - WhichDataType which(data_type); - return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); -} - -template -inline bool isColumnedAsDecimal(const T & data_type) -{ - WhichDataType which(data_type); - return which.isDecimal() || which.isDateTime64(); -} +#undef DISPATCH +#undef FOR_TYPES_OF_TYPE // Same as isColumnedAsDecimal but also checks value type of underlyig column. template @@ -498,19 +503,6 @@ inline bool isColumnedAsDecimalT(const DataType & data_type) return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex; } -template -inline bool isNotCreatable(const T & data_type) -{ - WhichDataType which(data_type); - return which.isNothing() || which.isFunction() || which.isSet(); -} - -inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type) -{ - WhichDataType which(data_type); - return which.isInt() || which.isUInt() || which.isFloat(); -} - inline bool isBool(const DataTypePtr & data_type) { return data_type->getName() == "Bool"; diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.h b/src/DataTypes/Serializations/SerializationDecimalBase.h index 08f963cedbb..5676280d34b 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.h +++ b/src/DataTypes/Serializations/SerializationDecimalBase.h @@ -29,4 +29,10 @@ public: void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; }; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; + } diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index de922747ccd..979c589c64b 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -100,7 +100,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_arguments{ - {"value", &isStringOrFixedString, nullptr, "String or FixedString"} + {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 5619ebdae49..9f44d3e95c2 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -108,8 +108,10 @@ struct FunctionArgumentDescriptor { const char * argument_name; - std::function type_validator_func; - std::function column_validator_func; + using TypeValidator = bool (*)(const IDataType &); + TypeValidator type_validator_func; + using ColumnValidator = bool (*)(const IColumn &); + ColumnValidator column_validator_func; const char * expected_type_description; diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index 4d723a5632c..aee04a5969a 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -35,9 +35,9 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "String or FixedString"}, - {"pattern", &isString, nullptr, "String"}, - {"replacement", &isString, nullptr, "String"} + {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, + {"pattern", static_cast(&isString), nullptr, "String"}, + {"replacement", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index 5c4e582c637..ddf10b863ac 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -184,12 +184,12 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"separator", &isString, isColumnConst, "const String"}, - {"s", &isString, nullptr, "String"} + {"separator", static_cast(&isString), isColumnConst, "const String"}, + {"s", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ - {"max_substrings", &isNativeInteger, isColumnConst, "const Number"}, + {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); @@ -198,11 +198,11 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"s", &isString, nullptr, "String"}, + {"s", static_cast(&isString), nullptr, "String"}, }; FunctionArgumentDescriptors optional_args{ - {"max_substrings", &isNativeInteger, isColumnConst, "const Number"}, + {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index d74237afd77..53421a565cb 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -45,7 +45,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime64, nullptr, "DateTime64"} + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 4792c997f51..a03f0b602b9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -154,21 +154,21 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto optional_args = FunctionArgumentDescriptors{ - {"IV", &isStringOrFixedString, nullptr, "Initialization vector binary string"}, + {"IV", static_cast(&isStringOrFixedString), nullptr, "Initialization vector binary string"}, }; if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL) { optional_args.emplace_back(FunctionArgumentDescriptor{ - "AAD", &isStringOrFixedString, nullptr, "Additional authenticated data binary string for GCM mode" + "AAD", static_cast(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode" }); } validateFunctionArgumentTypes(*this, arguments, FunctionArgumentDescriptors{ - {"mode", &isStringOrFixedString, isColumnConst, "encryption mode string"}, - {"input", &isStringOrFixedString, {}, "plaintext"}, - {"key", &isStringOrFixedString, {}, "encryption key binary string"}, + {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, + {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, + {"key", static_cast(&isStringOrFixedString), {}, "encryption key binary string"}, }, optional_args ); @@ -425,21 +425,21 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto optional_args = FunctionArgumentDescriptors{ - {"IV", &isStringOrFixedString, nullptr, "Initialization vector binary string"}, + {"IV", static_cast(&isStringOrFixedString), nullptr, "Initialization vector binary string"}, }; if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL) { optional_args.emplace_back(FunctionArgumentDescriptor{ - "AAD", &isStringOrFixedString, nullptr, "Additional authenticated data binary string for GCM mode" + "AAD", static_cast(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode" }); } validateFunctionArgumentTypes(*this, arguments, FunctionArgumentDescriptors{ - {"mode", &isStringOrFixedString, isColumnConst, "decryption mode string"}, - {"input", &isStringOrFixedString, {}, "ciphertext"}, - {"key", &isStringOrFixedString, {}, "decryption key binary string"}, + {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, + {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, + {"key", static_cast(&isStringOrFixedString), {}, "decryption key binary string"}, }, optional_args ); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 19647e2f086..1522e76893e 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2129,12 +2129,12 @@ public: if constexpr (to_decimal) { - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); } if (!to_decimal && isDateTime64(arguments)) { - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); } // toString(DateTime or DateTime64, [timezone: String]) @@ -2150,7 +2150,7 @@ public: // toDateTime64(value, scale : Integer[, timezone: String]) || std::is_same_v) { - optional_args.push_back({"timezone", &isString, nullptr, "String"}); + optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -2498,11 +2498,11 @@ public: if (isDateTime64(arguments)) { validateFunctionArgumentTypes(*this, arguments, - FunctionArgumentDescriptors{{"string", &isStringOrFixedString, nullptr, "String or FixedString"}}, + FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ - {"precision", &isUInt8, isColumnConst, "const UInt8"}, - {"timezone", &isStringOrFixedString, isColumnConst, "const String or FixedString"}, + {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, + {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, }); UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index a82c50360f9..84e87061398 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -45,7 +45,7 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto args = FunctionArgumentDescriptors{ - {"json", &isString, nullptr, "String"}, + {"json", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index 25c6c9ef40b..bb39566c342 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -27,7 +27,7 @@ public: static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 9a60d4cf989..9f5b0031eeb 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -25,7 +25,7 @@ public: static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 08da148b43e..ee2eb25ae9d 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -25,7 +25,7 @@ public: static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 939622dd9d1..93f349acb06 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -26,7 +26,7 @@ public: static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index c2a4fee4845..9cb74a7aa62 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -84,8 +84,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array_1", &isArray, nullptr, "Array"}, - {"array_2", &isArray, nullptr, "Array"}, + {"array_1", static_cast(&isArray), nullptr, "Array"}, + {"array_2", static_cast(&isArray), nullptr, "Array"}, }; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index 40344efb077..b08a73b93f3 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -36,8 +36,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"samples", &isUInt, isColumnConst, "const UInt*"}, + {"array", static_cast(&isArray), nullptr, "Array"}, + {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index ade1cb862f7..8932482c69c 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -28,8 +28,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"length", &isInteger, nullptr, "Integer"} + {"array", static_cast(&isArray), nullptr, "Array"}, + {"length", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 0194cc4871a..c186c0ca7e6 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -151,12 +151,12 @@ public: { FunctionArgumentDescriptors mandatory_args { - {"arr", &isArray, nullptr, "Array"}, + {"arr", static_cast(&isArray), nullptr, "Array"}, }; FunctionArgumentDescriptors optional_args { - {"separator", &isString, isColumnConst, "const String"}, + {"separator", static_cast(&isString), isColumnConst, "const String"}, }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 26eaf4f5613..970e6fd6f75 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -210,10 +210,10 @@ private: FunctionArgumentDescriptors optional_args; if constexpr (IsDataTypeDecimal) - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); if (std::is_same_v || std::is_same_v) - optional_args.push_back({"timezone", &isString, isColumnConst, "const String"}); + optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index e9880e6e93f..fbbb9d017ee 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -35,8 +35,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "String or FixedString"}, - {"pattern", &isString, isColumnConst, "constant String"} + {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, + {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index ad49f32f769..94c915f8e38 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -53,8 +53,8 @@ public: static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"} + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index c64c9d6ccef..ac12cad1698 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -71,8 +71,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index e22938f8565..f62352af0bd 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -45,8 +45,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 2f6bc6f9903..92403d2e88e 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"query", &isString, nullptr, "String"} + {"query", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index a21d0cc25bf..b98c587d172 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"days", &isNativeInteger, nullptr, "Integer"}}; + FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 987cf4eb1a9..c7f3c195578 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -82,17 +82,17 @@ public: if (is_year_month_variant) { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_month_day[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[2], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_month_day[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } else { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_dayofyear[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_dayofyear[1], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } @@ -189,7 +189,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -344,16 +344,16 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -425,18 +425,18 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, nullptr, "const Number"}, - {optional_argument_names[1], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[2], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), nullptr, "const Number"}, + {optional_argument_names[1], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -564,11 +564,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -643,12 +643,12 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 860603dc503..18882177c90 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -489,12 +489,12 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"time", &isString, nullptr, "String"}, - {"format", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"}, + {"format", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ - {"timezone", &isString, &isColumnConst, "const String"} + {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index f6bbd2f96f2..cfb42580cb0 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -47,12 +47,12 @@ public: arguments.size()); FunctionArgumentDescriptors args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"}, + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"}, }; if (arguments.size() == 3) - args.emplace_back(FunctionArgumentDescriptor{"index", &isInteger, nullptr, "Integer"}); + args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index c1b553ac6b3..11a2ca37a3b 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -186,8 +186,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"s", &isString, nullptr, "String"}, - {"n", &isInteger, nullptr, "Integer"}, + {"s", static_cast(&isString), nullptr, "String"}, + {"n", static_cast(&isInteger), nullptr, "Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index fbabc801913..618808b64ed 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -42,8 +42,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"time_series", &isArray, nullptr, "Array"}, - {"period", &isNativeUInt, nullptr, "Unsigned Integer"}, + {"time_series", static_cast(&isArray), nullptr, "Array"}, + {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 5bc8edf3a54..da04d3b78d3 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -45,11 +45,11 @@ public: getName(), arguments.size()); - FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors mandatory_args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; FunctionArgumentDescriptors optional_args{ - {"min_percentile", &isFloat, isColumnConst, "Number"}, - {"max_percentile", &isFloat, isColumnConst, "Number"}, - {"k", &isNativeNumber, isColumnConst, "Number"}}; + {"min_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index c01f6b7f07b..fbaa2b14e64 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared(); diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 6aafa2cb5cf..f2dd1f1c51d 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -47,7 +47,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime, nullptr, "DateTime"} + {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -91,10 +91,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -151,7 +151,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime64, nullptr, "DateTime64"} + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -203,10 +203,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 009bc20e065..03dc0d06719 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -45,7 +45,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"n", &isInteger, nullptr, "Integer"} + {"n", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index cd3875e2607..a052f20d6fa 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -98,7 +98,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"sqid", &isString, nullptr, "String"} + {"sqid", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index 48012c1376f..fbca08b0968 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -41,10 +41,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"timestamp", &isStringOrFixedString, nullptr, "String or FixedString"} + {"timestamp", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; FunctionArgumentDescriptors optional_args{ - {"time", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index cc2de8df0d4..fc621b272de 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -39,8 +39,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args = { - {"Value", &isNumber, nullptr, "Number"}, - {"precision", &isNativeInteger, &isColumnConst, "const Integer"} + {"Value", static_cast(&isNumber), nullptr, "Number"}, + {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index c2ee5923c01..3578401a0f8 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -12,6 +12,7 @@ #include #include +#include "DataTypes/IDataType.h" #include #include @@ -35,9 +36,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + constexpr auto FORMAT_NAME = "Prometheus"; -static bool isDataTypeMapString(const DataTypePtr & type) +bool isDataTypeMapString(const DataTypePtr & type) { if (!isMap(type)) return false; @@ -45,8 +49,8 @@ static bool isDataTypeMapString(const DataTypePtr & type) return isStringOrFixedString(type_map->getKeyType()) && isStringOrFixedString(type_map->getValueType()); } -template -static void getColumnPos(const Block & header, const String & col_name, Pred pred, ResType & res) +template +void getColumnPos(const Block & header, const String & col_name, bool (*pred)(const DataTypePtr &), ResType & res) { static_assert(std::is_same_v || std::is_same_v>, "Illegal ResType"); @@ -71,7 +75,7 @@ static void getColumnPos(const Block & header, const String & col_name, Pred pre } } -static Float64 tryParseFloat(const String & s) +Float64 tryParseFloat(const String & s) { Float64 t = 0; ReadBufferFromString buf(s); @@ -79,6 +83,8 @@ static Float64 tryParseFloat(const String & s) return t; } +} + PrometheusTextOutputFormat::PrometheusTextOutputFormat( WriteBuffer & out_, const Block & header_, @@ -89,12 +95,12 @@ PrometheusTextOutputFormat::PrometheusTextOutputFormat( { const Block & header = getPort(PortKind::Main).getHeader(); - getColumnPos(header, "name", isStringOrFixedString, pos.name); - getColumnPos(header, "value", isNumber, pos.value); + getColumnPos(header, "name", isStringOrFixedString, pos.name); + getColumnPos(header, "value", isNumber, pos.value); - getColumnPos(header, "help", isStringOrFixedString, pos.help); - getColumnPos(header, "type", isStringOrFixedString, pos.type); - getColumnPos(header, "timestamp", isNumber, pos.timestamp); + getColumnPos(header, "help", isStringOrFixedString, pos.help); + getColumnPos(header, "type", isStringOrFixedString, pos.type); + getColumnPos(header, "timestamp", isNumber, pos.timestamp); getColumnPos(header, "labels", isDataTypeMapString, pos.labels); } From 97607f103142e22367e90601d323b42856ed9f6f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 7 Mar 2024 17:06:45 +0000 Subject: [PATCH 329/356] Fix style --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index fc3bd17fd53..c62641ca05c 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1994,7 +1994,6 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); auto io = interpreter->execute(); - std::cerr << StackTrace().toString() << std::endl; PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(context->getProgressCallback()); io.pipeline.setProcessListElement(context->getProcessListElement()); From b9d6f4b3ed999e217d0be22f1580b4d19de4e3d9 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 7 Mar 2024 17:42:50 +0000 Subject: [PATCH 330/356] fix deadlock in async inserts via native protocol --- src/Server/TCPHandler.cpp | 4 +++- .../03006_async_insert_deadlock_log.reference | 1 + .../0_stateless/03006_async_insert_deadlock_log.sh | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03006_async_insert_deadlock_log.reference create mode 100755 tests/queries/0_stateless/03006_async_insert_deadlock_log.sh diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d883029408c..a3cdec6a4f3 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -936,6 +936,8 @@ void TCPHandler::processInsertQuery() auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) { + /// Reset pipeline because it may hold write lock for some storages. + state.io.pipeline.reset(); if (settings.wait_for_async_insert) { size_t timeout_ms = settings.wait_for_async_insert_timeout.totalMilliseconds(); @@ -968,7 +970,7 @@ void TCPHandler::processInsertQuery() else { PushingPipelineExecutor executor(state.io.pipeline); - run_executor(executor, processed_block); + run_executor(executor, std::move(processed_block)); } sendInsertProfileEvents(); diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh new file mode 100755 index 00000000000..f01c34cdbda --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "CREATE TABLE t_async_insert_deadlock (a UInt64) ENGINE = Log" + +echo '{"a": 1}' | $CLICKHOUSE_CLIENT --async_insert 1 --wait_for_async_insert 1 --query "INSERT INTO t_async_insert_deadlock FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM t_async_insert_deadlock ORDER BY a" +$CLICKHOUSE_CLIENT --query "DROP TABLE t_async_insert_deadlock" From 0f166baf6be838840541fa31031395dce0cdf0ea Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 7 Mar 2024 17:44:27 +0000 Subject: [PATCH 331/356] Fast fix tests. --- .../02949_parallel_replicas_in_subquery.sql | 16 ++++++++-------- ...arallel_replicas_joins_and_analyzer.reference | 8 ++++---- ...7_parallel_replicas_joins_and_analyzer.sql.j2 | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql index 9000d37c801..ab6e1532299 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql @@ -8,23 +8,23 @@ INSERT INTO merge_tree_in_subqueries VALUES(5, 'test5', 0); SET max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED } -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED }; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -- { serverError SUPPORT_IS_DISABLED }; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; -- IN with tuples is allowed SELECT '---'; -SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=0; -SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_subqueries_for_in=1; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; DROP TABLE IF EXISTS merge_tree_in_subqueries; diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index edd99058bd9..100e4e500cd 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -294,7 +294,7 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; 0 0 0 0 0 0 1 1 0 0 0 0 3 3 0 0 0 0 @@ -317,7 +317,7 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0;-- { echoOn } +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0;-- { echoOn } Expression Sorting Expression @@ -631,7 +631,7 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; 0 0 0 0 0 0 1 1 0 0 0 0 3 3 0 0 0 0 @@ -654,7 +654,7 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; Expression Sorting Expression diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 index e0de8c64950..54505b147a3 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 @@ -153,7 +153,7 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; explain description=0 with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), @@ -162,6 +162,6 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y sub4 as (select z, a from tab3 where z != 8), sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) select * from sub5 order by x -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_subqueries_for_in=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; {%- endfor %} From 8e3fc1044f20edfbfa270ba34360411e02d669e2 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 7 Mar 2024 20:39:56 +0100 Subject: [PATCH 332/356] Reject INSERT if `async_insert=1` + `deduplicate_blocks_in_dependent_materialized_views=1` (#60888) --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/executeQuery.cpp | 16 ++++++++++++++++ src/Server/TCPHandler.cpp | 15 +++++++++++++++ ...uplication_throw_if_async_insert.reference | 0 ...mv_deduplication_throw_if_async_insert.sql | 19 +++++++++++++++++++ 6 files changed, 52 insertions(+) create mode 100644 tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference create mode 100644 tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9f22d35bb9e..c8bdb515baf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -598,6 +598,7 @@ class IColumn; M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e7b96cee9d3..2f1da7935e6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,6 +90,7 @@ static std::map sett {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f318f363eda..88021038ebb 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -104,6 +104,7 @@ namespace ErrorCodes extern const int QUERY_WAS_CANCELLED; extern const int INCORRECT_DATA; extern const int SYNTAX_ERROR; + extern const int SUPPORT_IS_DISABLED; extern const int INCORRECT_QUERY; } @@ -1023,6 +1024,21 @@ static std::tuple executeQueryImpl( if (settings.implicit_transaction && settings.throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts with 'implicit_transaction' are not supported"); + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + quota = context->getQuota(); if (quota) { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d883029408c..02bfd1d8359 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -933,6 +933,21 @@ void TCPHandler::processInsertQuery() if (insert_queue && async_insert_enabled && !insert_query.select) { + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) { diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql new file mode 100644 index 00000000000..808317c917e --- /dev/null +++ b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS 02985_test; + +SET async_insert = 1; +SET deduplicate_blocks_in_dependent_materialized_views = 1; + +CREATE TABLE 03006_test +( + d Date, + value UInt64 +) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO 03006_test VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); -- { serverError SUPPORT_IS_DISABLED } +INSERT INTO 03006_test SETTINGS compatibility='24.1' VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS deduplicate_blocks_in_dependent_materialized_views=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); + +DROP TABLE IF EXISTS 02985_test; From 5bfb6f08a8cad877950c8776a766e67a13e4f3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B1=AA=E8=82=A5=E8=82=A5?= Date: Fri, 8 Mar 2024 06:29:15 +0800 Subject: [PATCH 333/356] [Docs] correction arguments --- docs/en/operations/utilities/clickhouse-local.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 437a5f0fff0..93a3fecf3c6 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -201,12 +201,12 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. +- `-F`, `--file` — path to data, `stdin` by default. - `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. - `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. -- `--format`, `--output-format` — output format, `TSV` by default. +- `-f`, `--format`, `--output-format` — output format, `TSV` by default. - `-d`, `--database` — default database, `_local` by default. - `--stacktrace` — whether to dump debug output in case of exception. - `--echo` — print query before execution. From e7f99280dce94ac94fde78bf215e74d7ce103736 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 Mar 2024 02:10:01 +0100 Subject: [PATCH 334/356] Less contention in the cache, part 1 --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 5 +++++ src/Interpreters/Cache/FileCache.h | 1 + src/Interpreters/Cache/FileSegment.cpp | 7 +++++-- src/Interpreters/Cache/Guards.h | 4 +++- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 87fcf220ff0..052c059a72d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -475,6 +475,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentUseMicroseconds, "File segment use() time") \ M(FileSegmentRemoveMicroseconds, "File segment remove() time") \ M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ + M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index d242544f787..9c705ddc27c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -188,6 +188,11 @@ CacheGuard::Lock FileCache::lockCache() const return cache_guard.lock(); } +CacheGuard::Lock FileCache::tryLockCache() const +{ + return cache_guard.tryLock(); +} + FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const { /// Given range = [left, right] and non-overlapping ordered set of file segments, diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2de2f347999..5b665ad0271 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,6 +173,7 @@ public: void deactivateBackgroundOperations(); CacheGuard::Lock lockCache() const; + CacheGuard::Lock tryLockCache() const; std::vector sync(); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8bd89465917..e58402dac03 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -23,6 +23,7 @@ namespace ProfileEvents extern const Event FileSegmentWriteMicroseconds; extern const Event FileSegmentUseMicroseconds; extern const Event FileSegmentHolderCompleteMicroseconds; + extern const Event FileSegmentFailToIncreasePriority; extern const Event FilesystemCacheHoldFileSegments; extern const Event FilesystemCacheUnusedHoldFileSegments; } @@ -965,8 +966,10 @@ void FileSegment::increasePriority() auto it = getQueueIterator(); if (it) { - auto cache_lock = cache->lockCache(); - hits_count = it->increasePriority(cache_lock); + if (auto cache_lock = cache->tryLockCache()) + hits_count = it->increasePriority(cache_lock); + else + ProfileEvents::increment(ProfileEvents::FileSegmentFailToIncreasePriority); } } diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 09586b55c61..5729620d82f 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -65,10 +65,12 @@ struct CacheGuard : private boost::noncopyable /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example struct Lock : public std::unique_lock { - explicit Lock(std::mutex & mutex_) : std::unique_lock(mutex_) {} + using Base = std::unique_lock; + using Base::Base; }; Lock lock() { return Lock(mutex); } + Lock tryLock() { return Lock(mutex, std::try_to_lock); } std::mutex mutex; }; From 582a427931336c02ed1971f5c5daf369b6cd49d3 Mon Sep 17 00:00:00 2001 From: lzydmxy <13126752315@163.com> Date: Fri, 8 Mar 2024 13:54:37 +0800 Subject: [PATCH 335/356] Add comment for move connection drain from prepare() to work() --- src/Processors/Sources/RemoteSource.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index c09657ece35..17fdac43a75 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -90,6 +90,9 @@ ISource::Status RemoteSource::prepare() void RemoteSource::work() { + /// Connection drain is a heavy operation that may take a long time. + /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. + /// See issue: https://github.com/ClickHouse/ClickHouse/issues/60844 if (need_drain) { query_executor->finish(); From efb72ca8f1a9d20b2053ce280a22e10039d57ed1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 8 Mar 2024 08:41:08 +0100 Subject: [PATCH 336/356] Add tests for fixed issues --- .../0_stateless/03006_buffer_overflow_join.reference | 0 tests/queries/0_stateless/03006_buffer_overflow_join.sql | 6 ++++++ .../03007_column_nullable_uninitialzed_value.reference | 3 +++ .../03007_column_nullable_uninitialzed_value.sql | 1 + 4 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03006_buffer_overflow_join.reference create mode 100644 tests/queries/0_stateless/03006_buffer_overflow_join.sql create mode 100644 tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference create mode 100644 tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.reference b/tests/queries/0_stateless/03006_buffer_overflow_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.sql b/tests/queries/0_stateless/03006_buffer_overflow_join.sql new file mode 100644 index 00000000000..8c1fa3cecc0 --- /dev/null +++ b/tests/queries/0_stateless/03006_buffer_overflow_join.sql @@ -0,0 +1,6 @@ +CREATE TABLE 03006_buffer_overflow_l (`a` String, `b` Tuple(String, String)) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_l SELECT * FROM generateRandom() limit 1000; +CREATE TABLE 03006_buffer_overflow_r (`a` LowCardinality(Nullable(String)), `c` Tuple(LowCardinality(String), LowCardinality(String))) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_r SELECT * FROM generateRandom() limit 1000; + +SELECT a FROM 03006_buffer_overflow_l RIGHT JOIN 03006_buffer_overflow_r USING (a) ORDER BY a ASC NULLS FIRST FORMAT Null; diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference new file mode 100644 index 00000000000..bead7ee1474 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference @@ -0,0 +1,3 @@ +\N 1000 + +\N 1000 diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql new file mode 100644 index 00000000000..9479044e0e0 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql @@ -0,0 +1 @@ +SELECT count(NULL) IGNORE NULLS > avg(toDecimal32(NULL)) IGNORE NULLS, count() FROM numbers(1000) WITH TOTALS SETTINGS allow_experimental_analyzer = 1; From b1dcf3183be1c140051a8f6bbd3a541bb4d8d47c Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Fri, 8 Mar 2024 17:37:03 +0800 Subject: [PATCH 337/356] Update settings.md --- docs/zh/operations/settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 307159aa5a1..c3b4194ed44 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1087,8 +1087,8 @@ ClickHouse生æˆå¼‚常 - [isNull](../../sql-reference/operators/index.md#operator-is-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 - [isNotNull](../../sql-reference/operators/index.md#is-not-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 - [count](../../sql-reference/aggregate-functions/reference/count.md) è¯»å– [null](../../sql-reference/data-types/nullable.md/#finding-null) å­åˆ—。 -- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) è¯»å– [keys](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 -- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) è¯»å– [values](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 +- [mapKeys](../../sql-reference/functions/tuple-map-functions.mdx/#mapkeys) è¯»å– [keys](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 +- [mapValues](../../sql-reference/functions/tuple-map-functions.mdx/#mapvalues) è¯»å– [values](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 å¯èƒ½çš„值: From 46332f334ccda467abfa673a9e75683a47e24110 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Fri, 8 Mar 2024 15:17:07 +0200 Subject: [PATCH 338/356] rm unused alias set/vector pair --- src/Client/QueryFuzzer.cpp | 16 ---------------- src/Client/QueryFuzzer.h | 4 +--- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 0597a7c1eed..ea2e5e17afd 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -1227,12 +1227,6 @@ void QueryFuzzer::collectFuzzInfoMain(ASTPtr ast) { collectFuzzInfoRecurse(ast); - aliases.clear(); - for (const auto & alias : aliases_set) - { - aliases.push_back(alias); - } - column_like.clear(); for (const auto & [name, value] : column_like_map) { @@ -1285,16 +1279,6 @@ void QueryFuzzer::addColumnLike(ASTPtr ast) void QueryFuzzer::collectFuzzInfoRecurse(ASTPtr ast) { - if (auto * impl = dynamic_cast(ast.get())) - { - if (aliases_set.size() > 1000) - { - aliases_set.clear(); - } - - aliases_set.insert(impl->alias); - } - if (typeid_cast(ast.get())) { addColumnLike(ast); diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index f5465626d96..3bc7b0842d3 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -50,9 +50,7 @@ struct QueryFuzzer // we are currently fuzzing. We add some part from each new query we are asked // to fuzz, and keep this state between queries, so the fuzzing output becomes // more interesting over time, as the queries mix. - std::unordered_set aliases_set; - std::vector aliases; - + // The maps are used for collection, and the vectors are used for random access. std::unordered_map column_like_map; std::vector column_like; From f73a8f2eae72ae37b9d1b5b89fc3bca217536b70 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 8 Mar 2024 18:05:56 +0100 Subject: [PATCH 339/356] More comments for column for system tables (#59016) --- src/Access/Common/QuotaDefs.cpp | 90 ++++++++++++-- src/Access/Common/QuotaDefs.h | 2 + src/Access/Common/RowPolicyDefs.cpp | 9 +- src/Access/Common/RowPolicyDefs.h | 1 + src/Interpreters/AsynchronousInsertLog.cpp | 36 +++--- src/Interpreters/BackupLog.cpp | 36 +++--- src/Interpreters/BlobStorageLog.cpp | 26 ++-- src/Interpreters/CrashLog.cpp | 24 ++-- src/Interpreters/FilesystemCacheLog.cpp | 28 ++--- src/Interpreters/OpenTelemetrySpanLog.cpp | 25 ++-- src/Interpreters/PartLog.cpp | 2 +- src/Interpreters/ProcessorsProfileLog.cpp | 36 +++--- src/Interpreters/QueryLog.cpp | 6 +- src/Interpreters/QueryThreadLog.cpp | 94 +++++++-------- src/Interpreters/QueryViewsLog.cpp | 46 +++---- src/Interpreters/S3QueueLog.cpp | 26 ++-- src/Interpreters/SessionLog.cpp | 47 ++++---- src/Interpreters/TraceLog.cpp | 35 +++--- src/Interpreters/TransactionsInfoLog.cpp | 26 ++-- src/Interpreters/ZooKeeperLog.cpp | 66 +++++----- .../System/StorageSystemCertificates.cpp | 21 ++-- src/Storages/System/StorageSystemColumns.cpp | 47 ++++---- .../StorageSystemDataSkippingIndices.cpp | 20 +-- .../System/StorageSystemDetachedParts.cpp | 24 ++-- .../System/StorageSystemDictionaries.cpp | 2 +- src/Storages/System/StorageSystemDisks.cpp | 26 ++-- .../System/StorageSystemFilesystemCache.cpp | 30 ++--- src/Storages/System/StorageSystemGraphite.cpp | 6 +- .../System/StorageSystemKafkaConsumers.cpp | 4 +- src/Storages/System/StorageSystemMerges.cpp | 10 +- src/Storages/System/StorageSystemModels.cpp | 9 +- .../System/StorageSystemMutations.cpp | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 2 + src/Storages/System/StorageSystemOne.cpp | 2 + .../StorageSystemPartMovesBetweenShards.cpp | 29 +++-- src/Storages/System/StorageSystemParts.cpp | 40 +++--- .../System/StorageSystemPartsColumns.cpp | 101 ++++++++-------- .../System/StorageSystemPrivileges.cpp | 22 ++-- .../System/StorageSystemProcesses.cpp | 58 ++++----- .../System/StorageSystemProjectionParts.cpp | 114 +++++++++--------- .../StorageSystemProjectionPartsColumns.cpp | 96 +++++++-------- .../System/StorageSystemQuotaLimits.cpp | 3 +- .../System/StorageSystemQuotaUsage.cpp | 4 +- .../System/StorageSystemRemoteDataPaths.cpp | 16 +-- src/Storages/System/StorageSystemReplicas.cpp | 76 ++++++------ .../System/StorageSystemRowPolicies.cpp | 4 +- src/Storages/System/StorageSystemS3Queue.cpp | 16 +-- .../System/StorageSystemSettingsChanges.cpp | 4 +- .../StorageSystemSettingsProfileElements.cpp | 2 +- .../System/StorageSystemStackTrace.cpp | 10 +- .../System/StorageSystemStoragePolicies.cpp | 20 +-- src/Storages/System/StorageSystemSymbols.cpp | 6 +- src/Storages/System/StorageSystemTables.cpp | 6 +- .../System/StorageSystemTransactions.cpp | 11 +- .../System/StorageSystemUserDirectories.cpp | 9 +- .../System/StorageSystemUserProcesses.cpp | 8 +- src/Storages/System/StorageSystemUsers.cpp | 8 +- .../System/StorageSystemViewRefreshes.cpp | 16 +-- .../02294_system_certificates.reference | 20 +-- ...26_settings_changes_system_table.reference | 4 +- ..._all_columns_should_have_comment.reference | 0 .../02992_all_columns_should_have_comment.sql | 4 + 62 files changed, 845 insertions(+), 728 deletions(-) create mode 100644 tests/queries/0_stateless/02992_all_columns_should_have_comment.reference create mode 100644 tests/queries/0_stateless/02992_all_columns_should_have_comment.sql diff --git a/src/Access/Common/QuotaDefs.cpp b/src/Access/Common/QuotaDefs.cpp index 0e9a4d5a365..04c16a562d2 100644 --- a/src/Access/Common/QuotaDefs.cpp +++ b/src/Access/Common/QuotaDefs.cpp @@ -49,71 +49,135 @@ String QuotaTypeInfo::valueToStringWithName(QuotaValue value) const const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type) { - static constexpr auto make_info = [](const char * raw_name_, UInt64 output_denominator_) + static constexpr auto make_info = [](const char * raw_name_, String current_usage_description_, String max_allowed_usage_description_, UInt64 output_denominator_) { String init_name = raw_name_; boost::to_lower(init_name); String init_keyword = raw_name_; boost::replace_all(init_keyword, "_", " "); bool init_output_as_float = (output_denominator_ != 1); - return QuotaTypeInfo{raw_name_, std::move(init_name), std::move(init_keyword), init_output_as_float, output_denominator_}; + return QuotaTypeInfo + { + .raw_name = raw_name_, + .name = std::move(init_name), + .keyword = std::move(init_keyword), + .current_usage_description = std::move(current_usage_description_), + .max_allowed_usage_description = std::move(max_allowed_usage_description_), + .output_as_float = init_output_as_float, + .output_denominator = output_denominator_ + }; }; switch (type) { case QuotaType::QUERIES: { - static const auto info = make_info("QUERIES", 1); + static const auto info = make_info( + "QUERIES", + "The current number of executed queries.", + "The maximum allowed number of queries of all types allowed to be executed.", + 1 + ); return info; } case QuotaType::QUERY_SELECTS: { - static const auto info = make_info("QUERY_SELECTS", 1); + static const auto info = make_info( + "QUERY_SELECTS", + "The current number of executed SELECT queries.", + "The maximum allowed number of SELECT queries allowed to be executed.", + 1 + ); return info; } case QuotaType::QUERY_INSERTS: { - static const auto info = make_info("QUERY_INSERTS", 1); + static const auto info = make_info( + "QUERY_INSERTS", + "The current number of executed INSERT queries.", + "The maximum allowed number of INSERT queries allowed to be executed.", + 1 + ); return info; } case QuotaType::ERRORS: { - static const auto info = make_info("ERRORS", 1); + static const auto info = make_info( + "ERRORS", + "The current number of queries resulted in an error.", + "The maximum number of queries resulted in an error allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::RESULT_ROWS: { - static const auto info = make_info("RESULT_ROWS", 1); + static const auto info = make_info( + "RESULT_ROWS", + "The current total number of rows in the result set of all queries within the current period of time.", + "The maximum total number of rows in the result set of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::RESULT_BYTES: { - static const auto info = make_info("RESULT_BYTES", 1); + static const auto info = make_info( + "RESULT_BYTES", + "The current total number of bytes in the result set of all queries within the current period of time.", + "The maximum total number of bytes in the result set of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::READ_ROWS: { - static const auto info = make_info("READ_ROWS", 1); + static const auto info = make_info( + "READ_ROWS", + "The current total number of rows read during execution of all queries within the current period of time.", + "The maximum number of rows to read during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::READ_BYTES: { - static const auto info = make_info("READ_BYTES", 1); + static const auto info = make_info( + "READ_BYTES", + "The current total number of bytes read during execution of all queries within the current period of time.", + "The maximum number of bytes to read during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::EXECUTION_TIME: { - static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */); + static const auto info = make_info( + "EXECUTION_TIME", + "The current total amount of time (in nanoseconds) spent to execute queries within the current period of time", + "The maximum amount of time (in nanoseconds) allowed for all queries to execute within the specified period of time", + 1000000000 /* execution_time is stored in nanoseconds */ + ); return info; } case QuotaType::WRITTEN_BYTES: { - static const auto info = make_info("WRITTEN_BYTES", 1); + static const auto info = make_info( + "WRITTEN_BYTES", + "The current total number of bytes written during execution of all queries within the current period of time.", + "The maximum number of bytes to written during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS: { - static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1); + static const auto info = make_info( + "FAILED_SEQUENTIAL_AUtheNTICATIONS", + "The current number of consecutive authentication failures within the current period of time.", + "The maximum number of consecutive authentication failures allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::MAX: break; diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index 4f849a72b43..6618f01c8f9 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -33,6 +33,8 @@ struct QuotaTypeInfo const char * const raw_name = ""; const String name; /// Lowercased with underscores, e.g. "result_rows". const String keyword; /// Uppercased with spaces, e.g. "RESULT ROWS". + const String current_usage_description; + const String max_allowed_usage_description; const bool output_as_float = false; const UInt64 output_denominator = 1; String valueToString(QuotaValue value) const; diff --git a/src/Access/Common/RowPolicyDefs.cpp b/src/Access/Common/RowPolicyDefs.cpp index b1f882fe971..a9509b6dd76 100644 --- a/src/Access/Common/RowPolicyDefs.cpp +++ b/src/Access/Common/RowPolicyDefs.cpp @@ -33,7 +33,7 @@ String toString(RowPolicyFilterType type) const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType type_) { - static constexpr auto make_info = [](const char * raw_name_) + static constexpr auto make_info = [](const char * raw_name_, const String & comment_) { String init_name = raw_name_; boost::to_lower(init_name); @@ -41,14 +41,17 @@ const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType String init_command = init_name.substr(0, underscore_pos); boost::to_upper(init_command); bool init_is_check = (std::string_view{init_name}.substr(underscore_pos + 1) == "check"); - return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), init_is_check}; + return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), comment_, init_is_check}; }; switch (type_) { case RowPolicyFilterType::SELECT_FILTER: { - static const auto info = make_info("SELECT_FILTER"); + static const auto info = make_info( + "SELECT_FILTER", + "Expression which is used for filtering in SELECT queries." + ); return info; } #if 0 /// Row-level security for INSERT, UPDATE, DELETE is not implemented yet. diff --git a/src/Access/Common/RowPolicyDefs.h b/src/Access/Common/RowPolicyDefs.h index 7ffc99e1272..bf2f632e98b 100644 --- a/src/Access/Common/RowPolicyDefs.h +++ b/src/Access/Common/RowPolicyDefs.h @@ -52,6 +52,7 @@ struct RowPolicyFilterTypeInfo const char * const raw_name; const String name; /// Lowercased with underscores, e.g. "select_filter". const String command; /// Uppercased without last word, e.g. "SELECT". + const String description; const bool is_check; /// E.g. false for SELECT_FILTER. static const RowPolicyFilterTypeInfo & get(RowPolicyFilterType type); }; diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 5d851f6b47d..0fc39c77fb4 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -33,26 +33,26 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() }); return ColumnsDescription{ - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the async insert happened."}, + {"event_time", std::make_shared(), "The date and time when the async insert finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the async insert finished execution with microseconds precision."}, - {"query", std::make_shared()}, - {"database", std::make_shared(std::make_shared())}, - {"table", std::make_shared(std::make_shared())}, - {"format", std::make_shared(std::make_shared())}, - {"query_id", std::make_shared()}, - {"bytes", std::make_shared()}, - {"rows", std::make_shared()}, - {"exception", std::make_shared()}, - {"status", type_status}, - {"data_kind", type_data_kind}, + {"query", std::make_shared(), "Query string."}, + {"database", std::make_shared(std::make_shared()), "The name of the database the table is in."}, + {"table", std::make_shared(std::make_shared()), "Table name."}, + {"format", std::make_shared(std::make_shared()), "Format name."}, + {"query_id", std::make_shared(), "ID of the initial query."}, + {"bytes", std::make_shared(), "Number of inserted bytes."}, + {"rows", std::make_shared(), "Number of inserted rows."}, + {"exception", std::make_shared(), "Exception message."}, + {"status", type_status, "Status of the view. Values: 'Ok' = 1 — Successful insert, 'ParsingError' = 2 — Exception when parsing the data, 'FlushError' = 3 — Exception when flushing the data"}, + {"data_kind", type_data_kind, "The status of the data. Value: 'Parsed' and 'Preprocessed'."}, - {"flush_time", std::make_shared()}, - {"flush_time_microseconds", std::make_shared(6)}, - {"flush_query_id", std::make_shared()}, - {"timeout_milliseconds", std::make_shared()}, + {"flush_time", std::make_shared(), "The date and time when the flush happened."}, + {"flush_time_microseconds", std::make_shared(6), "The date and time when the flush happened with microseconds precision."}, + {"flush_query_id", std::make_shared(), "ID of the flush query."}, + {"timeout_milliseconds", std::make_shared(), "The adaptive timeout calculated for this entry."}, }; } diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index d5b69bc0728..af6c7cf6234 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -22,24 +22,24 @@ ColumnsDescription BackupLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"id", std::make_shared()}, - {"name", std::make_shared()}, - {"base_backup_name", std::make_shared()}, - {"query_id", std::make_shared()}, - {"status", std::make_shared(getBackupStatusEnumValues())}, - {"error", std::make_shared()}, - {"start_time", std::make_shared()}, - {"end_time", std::make_shared()}, - {"num_files", std::make_shared()}, - {"total_size", std::make_shared()}, - {"num_entries", std::make_shared()}, - {"uncompressed_size", std::make_shared()}, - {"compressed_size", std::make_shared()}, - {"files_read", std::make_shared()}, - {"bytes_read", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, + {"id", std::make_shared(), "Identifier of the backup or restore operation."}, + {"name", std::make_shared(), "Name of the backup storage (the contents of the FROM or TO clause)."}, + {"base_backup_name", std::make_shared(), "The name of base backup in case incremental one."}, + {"query_id", std::make_shared(), "The ID of a query associated with a backup operation."}, + {"status", std::make_shared(getBackupStatusEnumValues()), "Operation status."}, + {"error", std::make_shared(), "Error message of the failed operation (empty string for successful operations)."}, + {"start_time", std::make_shared(), "Start time of the operation."}, + {"end_time", std::make_shared(), "End time of the operation."}, + {"num_files", std::make_shared(), "Number of files stored in the backup."}, + {"total_size", std::make_shared(), "Total size of files stored in the backup."}, + {"num_entries", std::make_shared(), "Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as num_files if it's an incremental backup or if it contains empty files or duplicates. The following is always true: num_entries <= num_files."}, + {"uncompressed_size", std::make_shared(), "Uncompressed size of the backup."}, + {"compressed_size", std::make_shared(), "Compressed size of the backup. If the backup is not stored as an archive it equals to uncompressed_size."}, + {"files_read", std::make_shared(), "Number of files read during the restore operation."}, + {"bytes_read", std::make_shared(), "Total size of files read during the restore operation."}, }; } diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index 520405374ca..f9d5b0d6790 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -26,23 +26,23 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription() return ColumnsDescription { - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"event_date", std::make_shared(), "Date of the event."}, + {"event_time", std::make_shared(), "Time of the event."}, + {"event_time_microseconds", std::make_shared(6), "Time of the event with microseconds precision."}, - {"event_type", event_enum_type}, + {"event_type", event_enum_type, "Type of the event. Possible values: 'Upload', 'Delete', 'MultiPartUploadCreate', 'MultiPartUploadWrite', 'MultiPartUploadComplete', 'MultiPartUploadAbort'"}, - {"query_id", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"thread_name", std::make_shared()}, + {"query_id", std::make_shared(), "Identifier of the query associated with the event, if any."}, + {"thread_id", std::make_shared(), "Identifier of the thread performing the operation."}, + {"thread_name", std::make_shared(), "Name of the thread performing the operation."}, - {"disk_name", std::make_shared(std::make_shared())}, - {"bucket", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"data_size", std::make_shared()}, + {"disk_name", std::make_shared(std::make_shared()), "Name of the associated disk."}, + {"bucket", std::make_shared(), "Name of the bucket."}, + {"remote_path", std::make_shared(), "Path to the remote resource."}, + {"local_path", std::make_shared(), "Path to the metadata file on the local system, which references the remote resource."}, + {"data_size", std::make_shared(), "Size of the data involved in the upload event."}, - {"error", std::make_shared()}, + {"error", std::make_shared(), "Error message associated with the event, if any."}, }; } diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 4fb81e4bcf7..410ea922429 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -23,18 +23,18 @@ ColumnsDescription CrashLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"timestamp_ns", std::make_shared()}, - {"signal", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"trace_full", std::make_shared(std::make_shared())}, - {"version", std::make_shared()}, - {"revision", std::make_shared()}, - {"build_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where the crash occurred."}, + {"event_date", std::make_shared(), "The date of the crash."}, + {"event_time", std::make_shared(), "The time of the crash."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the event with nanoseconds."}, + {"signal", std::make_shared(), "Signal number."}, + {"thread_id", std::make_shared(), "Thread ID."}, + {"query_id", std::make_shared(), "Query ID."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process."}, + {"trace_full", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process."}, + {"version", std::make_shared(), "ClickHouse server version."}, + {"revision", std::make_shared(), "ClickHouse server revision."}, + {"build_id", std::make_shared(), "BuildID that is generated by compiler."}, }; } diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index ccfee49a66f..80fe1c3a8ef 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -38,20 +38,20 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"query_id", std::make_shared()}, - {"source_file_path", std::make_shared()}, - {"file_segment_range", std::make_shared(types)}, - {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, - {"size", std::make_shared()}, - {"read_type", std::make_shared()}, - {"read_from_cache_attempted", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"read_buffer_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date"}, + {"event_time", std::make_shared(), "Event time"}, + {"query_id", std::make_shared(), "Id of the query"}, + {"source_file_path", std::make_shared(), "File segment path on filesystem"}, + {"file_segment_range", std::make_shared(types), "File segment range"}, + {"total_requested_range", std::make_shared(types), "Full read range"}, + {"key", std::make_shared(), "File segment key"}, + {"offset", std::make_shared(), "File segment offset"}, + {"size", std::make_shared(), "Read size"}, + {"read_type", std::make_shared(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"}, + {"read_from_cache_attempted", std::make_shared(), "Whether reading from cache was attempted"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while reading this file segment"}, + {"read_buffer_id", std::make_shared(), "Internal implementation read buffer id"}, }; } diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index fffc1e50da0..aa11749f8a6 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -32,12 +32,17 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"trace_id", std::make_shared()}, - {"span_id", std::make_shared()}, - {"parent_span_id", std::make_shared()}, - {"operation_name", low_cardinality_string}, - {"kind", std::move(span_kind_type)}, + {"hostname", low_cardinality_string, "The hostname where this span was captured."}, + {"trace_id", std::make_shared(), "ID of the trace for executed query."}, + {"span_id", std::make_shared(), "ID of the trace span."}, + {"parent_span_id", std::make_shared(), "ID of the parent trace span."}, + {"operation_name", low_cardinality_string, "The name of the operation."}, + {"kind", std::move(span_kind_type), "The SpanKind of the span. " + "INTERNAL — Indicates that the span represents an internal operation within an application. " + "SERVER — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. " + "CLIENT — Indicates that the span describes a request to some remote service. " + "PRODUCER — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. " + "CONSUMER - Indicates that the span describes a child of an asynchronous PRODUCER request."}, // DateTime64 is really unwieldy -- there is no "normal" way to convert // it to an UInt64 count of microseconds, except: // 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just @@ -48,10 +53,10 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() // Also subtraction of two DateTime64 points doesn't work, so you can't // get duration. // It is much less hassle to just use UInt64 of microseconds. - {"start_time_us", std::make_shared()}, - {"finish_time_us", std::make_shared()}, - {"finish_date", std::make_shared()}, - {"attribute", std::make_shared(low_cardinality_string, std::make_shared())}, + {"start_time_us", std::make_shared(), "The start time of the trace span (in microseconds)."}, + {"finish_time_us", std::make_shared(), "The finish time of the trace span (in microseconds)."}, + {"finish_date", std::make_shared(), "The finish date of the trace span."}, + {"attribute", std::make_shared(low_cardinality_string, std::make_shared()), "Attribute depending on the trace span. They are filled in according to the recommendations in the OpenTelemetry standard."}, }; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index a7f20a06785..66f933f1afa 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -123,7 +123,7 @@ ColumnsDescription PartLogElement::getColumnsDescription() {"table_uuid", std::make_shared(), "UUID of the table the data part belongs to."}, {"part_name", std::make_shared(), "Name of the data part."}, {"partition_id", std::make_shared(), "ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`."}, - {"partition", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, {"part_type", std::make_shared(), "The type of the part. Possible values: Wide and Compact."}, {"disk_name", std::make_shared(), "The disk name data part lies on."}, {"path_on_disk", std::make_shared(), "Absolute path to the folder with data part files."}, diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 088d193257c..015b4abc712 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -21,26 +21,26 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(), "The date and time when the event happened."}, + {"event_time_microseconds", std::make_shared(6), "The date and time with microseconds precision when the event happened."}, - {"id", std::make_shared()}, - {"parent_ids", std::make_shared(std::make_shared())}, - {"plan_step", std::make_shared()}, - {"plan_group", std::make_shared()}, + {"id", std::make_shared(), "ID of processor."}, + {"parent_ids", std::make_shared(std::make_shared()), "Parent processors IDs."}, + {"plan_step", std::make_shared(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."}, + {"plan_group", std::make_shared(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."}, - {"initial_query_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"name", std::make_shared(std::make_shared())}, - {"elapsed_us", std::make_shared()}, - {"input_wait_elapsed_us", std::make_shared()}, - {"output_wait_elapsed_us", std::make_shared()}, - {"input_rows", std::make_shared()}, - {"input_bytes", std::make_shared()}, - {"output_rows", std::make_shared()}, - {"output_bytes", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"name", std::make_shared(std::make_shared()), "Name of the processor."}, + {"elapsed_us", std::make_shared(), "Number of microseconds this processor was executed."}, + {"input_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting for data (from other processor)."}, + {"output_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting because output port was full."}, + {"input_rows", std::make_shared(), "The number of rows consumed by processor."}, + {"input_bytes", std::make_shared(), "The number of bytes consumed by processor."}, + {"output_rows", std::make_shared(), "The number of rows generated by processor."}, + {"output_bytes", std::make_shared(), "The number of bytes generated by processor."}, }; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ad6e344655b..92f8ddae141 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -134,13 +134,13 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_storages", array_low_cardinality_string, "Canonical names of storages, which were used during query execution."}, {"used_table_functions", array_low_cardinality_string, "Canonical names of table functions, which were used during query execution."}, - {"used_row_policies", array_low_cardinality_string}, + {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, - {"transaction_id", getTransactionIDDataType()}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, - {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared()), "Metrics for asynchronous reading."}, }; } diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index d153e30a4ce..f50458745b9 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -28,58 +28,58 @@ ColumnsDescription QueryThreadLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"query_start_time", std::make_shared()}, - {"query_start_time_microseconds", std::make_shared(6)}, - {"query_duration_ms", std::make_shared()}, + {"hostname", low_cardinality_string, "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the thread has finished execution of the query."}, + {"event_time", std::make_shared(), "The date and time when the thread has finished execution of the query."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the thread has finished execution of the query with microseconds precision."}, + {"query_start_time", std::make_shared(), "Start time of query execution."}, + {"query_start_time_microseconds", std::make_shared(6), "Start time of query execution with microsecond precision."}, + {"query_duration_ms", std::make_shared(), "Duration of query execution."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "For INSERT queries, the number of written rows. For other queries, the column value is 0."}, + {"written_bytes", std::make_shared(), "For INSERT queries, the number of written bytes. For other queries, the column value is 0."}, + {"memory_usage", std::make_shared(), "The difference between the amount of allocated and freed memory in context of this thread."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this thread."}, - {"thread_name", low_cardinality_string}, - {"thread_id", std::make_shared()}, - {"master_thread_id", std::make_shared()}, - {"current_database", low_cardinality_string}, - {"query", std::make_shared()}, - {"normalized_query_hash", std::make_shared()}, + {"thread_name", low_cardinality_string, "Name of the thread."}, + {"thread_id", std::make_shared(), "Internal thread ID."}, + {"master_thread_id", std::make_shared(), "OS initial ID of initial thread."}, + {"current_database", low_cardinality_string, "Name of the current database."}, + {"query", std::make_shared(), "Query string."}, + {"normalized_query_hash", std::make_shared(), "The hash of normalized query - with wiped constanstans, etc."}, - {"is_initial_query", std::make_shared()}, - {"user", low_cardinality_string}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"initial_user", low_cardinality_string}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, - {"initial_query_start_time", std::make_shared()}, - {"initial_query_start_time_microseconds", std::make_shared(6)}, - {"interface", std::make_shared()}, - {"is_secure", std::make_shared()}, - {"os_user", low_cardinality_string}, - {"client_hostname", low_cardinality_string}, - {"client_name", low_cardinality_string}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, - {"http_method", std::make_shared()}, - {"http_user_agent", low_cardinality_string}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Query type. Possible values: 1 — Query was initiated by the client, 0 — Query was initiated by another query for distributed query execution."}, + {"user", low_cardinality_string, "Name of the user who initiated the current query."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address that was used to make the query."}, + {"port", std::make_shared(), "The client port that was used to make the query."}, + {"initial_user", low_cardinality_string, "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, + {"initial_query_start_time", std::make_shared(), "Start time of the initial query execution."}, + {"initial_query_start_time_microseconds", std::make_shared(6), "Start time of the initial query execution "}, + {"interface", std::make_shared(), "Interface that the query was initiated from. Possible values: 1 — TCP, 2 — HTTP."}, + {"is_secure", std::make_shared(), "The flag which shows whether the connection was secure."}, + {"os_user", low_cardinality_string, "OSs username who runs clickhouse-client."}, + {"client_hostname", low_cardinality_string, "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", low_cardinality_string, "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface, 1 — GET method was used., 2 — POST method was used."}, + {"http_user_agent", low_cardinality_string, "The UserAgent header passed in the HTTP request."}, + {"http_referer", std::make_shared(), "HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header `X-Forwarded-For` passed in the HTTP query."}, + {"quota_key", std::make_shared(), "The 'quota key' specified in the quotas setting."}, + {"distributed_depth", std::make_shared(), "How many times a query was forwarded between servers."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, + {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "ProfileEvents that measure different metrics for this thread. The description of them could be found in the table system.events."}, }; } diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index c426f2d3cf0..a5441363340 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -35,30 +35,34 @@ ColumnsDescription QueryViewsLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"view_duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the last event of the view happened."}, + {"event_time", std::make_shared(), "The date and time when the view finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the view finished execution with microseconds precision."}, + {"view_duration_ms", std::make_shared(), "Duration of view execution (sum of its stages) in milliseconds."}, - {"initial_query_id", std::make_shared()}, - {"view_name", std::make_shared()}, - {"view_uuid", std::make_shared()}, - {"view_type", std::move(view_type_datatype)}, - {"view_query", std::make_shared()}, - {"view_target", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"view_name", std::make_shared(), "Name of the view."}, + {"view_uuid", std::make_shared(), "UUID of the view."}, + {"view_type", std::move(view_type_datatype), "Type of the view. Values: 'Default' = 1 — Default views. Should not appear in this log, 'Materialized' = 2 — Materialized views, 'Live' = 3 — Live views."}, + {"view_query", std::make_shared(), "The query executed by the view."}, + {"view_target", std::make_shared(), "The name of the view target table."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "Number of written rows."}, + {"written_bytes", std::make_shared(), "Number of written bytes."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this view."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events."}, - {"status", std::move(view_status_datatype)}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()} + {"status", std::move(view_status_datatype), "Status of the view. Values: " + "'QueryStart' = 1 — Successful start the view execution. Should not appear, " + "'QueryFinish' = 2 — Successful end of the view execution, " + "'ExceptionBeforeStart' = 3 — Exception before the start of the view execution., " + "'ExceptionWhileProcessing' = 4 — Exception during the view execution."}, + {"exception_code", std::make_shared(), "Code of an exception."}, + {"exception", std::make_shared(), "Exception message."}, + {"stack_trace", std::make_shared(), "Stack trace. An empty string, if the query was completed successfully."} }; } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index 3ed58de0f87..ba990a8ac25 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -25,19 +25,19 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", status_datatype}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date of writing this log row"}, + {"event_time", std::make_shared(), "Event time of writing this log row"}, + {"database", std::make_shared(), "The name of a database where current S3Queue table lives."}, + {"table", std::make_shared(), "The name of S3Queue table."}, + {"uuid", std::make_shared(), "The UUID of S3Queue table"}, + {"file_name", std::make_shared(), "File name of the processing file"}, + {"rows_processed", std::make_shared(), "Number of processed rows"}, + {"status", status_datatype, "Status of the processing file"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, + {"exception", std::make_shared(), "Exception message if happened"}, }; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index a5bc5012292..dc0ac963d0b 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -121,33 +121,36 @@ ColumnsDescription SessionLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", lc_string_datatype}, - {"type", std::move(event_type)}, - {"auth_id", std::make_shared()}, - {"session_id", std::make_shared()}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", lc_string_datatype, "Hostname of the server executing the query."}, + {"type", std::move(event_type), "Login/logout result. Possible values: " + "LoginFailure — Login error. " + "LoginSuccess — Successful login. " + "Logout — Logout from the system."}, + {"auth_id", std::make_shared(), "Authentication ID, which is a UUID that is automatically generated each time user logins."}, + {"session_id", std::make_shared(), "Session ID that is passed by client via HTTP interface."}, + {"event_date", std::make_shared(), "Login/logout date."}, + {"event_time", std::make_shared(), "Login/logout time."}, + {"event_time_microseconds", std::make_shared(6), "Login/logout starting time with microseconds precision."}, - {"user", std::make_shared(std::make_shared())}, - {"auth_type", std::make_shared(std::move(identified_with_column))}, + {"user", std::make_shared(std::make_shared()), "User name."}, + {"auth_type", std::make_shared(std::move(identified_with_column)), "The authentication type."}, - {"profiles", std::make_shared(lc_string_datatype)}, - {"roles", std::make_shared(lc_string_datatype)}, - {"settings", std::move(settings_type_column)}, + {"profiles", std::make_shared(lc_string_datatype), "The list of profiles set for all roles and/or users."}, + {"roles", std::make_shared(lc_string_datatype), "The list of roles to which the profile is applied."}, + {"settings", std::move(settings_type_column), "Settings that were changed when the client logged in/out."}, - {"client_address", DataTypeFactory::instance().get("IPv6")}, - {"client_port", std::make_shared()}, - {"interface", std::move(interface_type_column)}, + {"client_address", DataTypeFactory::instance().get("IPv6"), "The IP address that was used to log in/out."}, + {"client_port", std::make_shared(), "The client port that was used to log in/out."}, + {"interface", std::move(interface_type_column), "The interface from which the login was initiated."}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"client_hostname", std::make_shared(), "The hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "The major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "The minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"failure_reason", std::make_shared()}, + {"failure_reason", std::make_shared(), "The exception message containing the reason for the login/logout failure."}, }; } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 26adb0cfc3f..01bedf34f15 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -29,20 +29,27 @@ ColumnsDescription TraceLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"timestamp_ns", std::make_shared()}, - {"revision", std::make_shared()}, - {"trace_type", std::make_shared(trace_values)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"size", std::make_shared()}, - {"ptr", std::make_shared()}, - {"event", std::make_shared(std::make_shared())}, - {"increment", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of sampling moment."}, + {"event_time", std::make_shared(), "Timestamp of the sampling moment."}, + {"event_time_microseconds", std::make_shared(6), "Timestamp of the sampling moment with microseconds precision."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the sampling moment in nanoseconds."}, + {"revision", std::make_shared(), "ClickHouse server build revision."}, + {"trace_type", std::make_shared(trace_values), "Trace type: " + "`Real` represents collecting stack traces by wall-clock time. " + "`CPU` represents collecting stack traces by CPU time. " + "`Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. " + "`MemorySample` represents collecting random allocations and deallocations. " + "`MemoryPeak` represents collecting updates of peak memory usage. " + "`ProfileEvent` represents collecting of increments of profile events." + }, + {"thread_id", std::make_shared(), "Thread identifier."}, + {"query_id", std::make_shared(), "Query identifier that can be used to get details about a query that was running from the query_log system table."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process."}, + {"size", std::make_shared(), "For trace types Memory, MemorySample or MemoryPeak is the amount of memory allocated, for other trace types is 0."}, + {"ptr", std::make_shared(), "The address of the allocated chunk."}, + {"event", std::make_shared(std::make_shared()), "For trace type ProfileEvent is the name of updated profile event, for other trace types is an empty string."}, + {"increment", std::make_shared(), "For trace type ProfileEvent is the amount of increment of profile event, for other trace types is 0."}, }; } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index 4a413439671..d13b31518d2 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -34,22 +34,22 @@ ColumnsDescription TransactionsInfoLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where transaction was executed."}, + {"type", std::move(type_enum), "The type of the transaction. Possible values: Begin, Commit, Rollback, AddPart, LockPart, UnlockPart."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(6), "Time of the entry"}, + {"thread_id", std::make_shared(), "The identifier of a thread."}, /// which thread? - {"query_id", std::make_shared()}, - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, + {"query_id", std::make_shared(), "The ID of a query executed in a scope of transaction."}, + {"tid", getTransactionIDDataType(), "The identifier of a transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, - {"csn", std::make_shared()}, + {"csn", std::make_shared(), "The Commit Sequence Number"}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part", std::make_shared()}, + {"database", std::make_shared(), "The name of the database the transaction was executed against."}, + {"table", std::make_shared(), "The name of the table the transaction was executed against."}, + {"uuid", std::make_shared(), "The uuid of the table the transaction was executed against."}, + {"part", std::make_shared(), "The name of the part participated in the transaction."}, // ? }; } diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 9cc31edfe56..6f6d4568064 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -122,49 +122,49 @@ ColumnsDescription ZooKeeperLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"session_id", std::make_shared()}, - {"duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"type", std::move(type_enum), "Event type in the ZooKeeper client. Can have one of the following values: Request — The request has been sent, Response — The response was received, Finalize — The connection is lost, no response was received."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(6), "The date and time when the event happened."}, + {"thread_id", std::make_shared(), "The ID of the thread executed this request."}, + {"query_id", std::make_shared(), "The ID of a query in scope of which this request was executed."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address of ZooKeeper server that was used to make the request."}, + {"port", std::make_shared(), "The port of ZooKeeper server that was used to make the request."}, + {"session_id", std::make_shared(), "The session ID that the ZooKeeper server sets for each connection."}, + {"duration_ms", std::make_shared(), "The time taken by ZooKeeper to execute the request."}, - {"xid", std::make_shared()}, - {"has_watch", std::make_shared()}, - {"op_num", op_num_enum}, - {"path", std::make_shared()}, + {"xid", std::make_shared(), "The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired response/finalize row."}, + {"has_watch", std::make_shared(), "The request whether the watch has been set."}, + {"op_num", op_num_enum, "The type of request or response."}, + {"path", std::make_shared(), "The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path."}, - {"data", std::make_shared()}, + {"data", std::make_shared(), "The data written to the ZooKeeper node (for the SET and CREATE requests — what the request wanted to write, for the response to the GET request — what was read) or an empty string."}, - {"is_ephemeral", std::make_shared()}, - {"is_sequential", std::make_shared()}, + {"is_ephemeral", std::make_shared(), "Is the ZooKeeper node being created as an ephemeral."}, + {"is_sequential", std::make_shared(), "Is the ZooKeeper node being created as an sequential."}, - {"version", std::make_shared(std::make_shared())}, + {"version", std::make_shared(std::make_shared()), "The version of the ZooKeeper node that the request expects when executing. This is supported for CHECK, SET, REMOVE requests (is relevant -1 if the request does not check the version or NULL for other requests that do not support version checking)."}, - {"requests_size", std::make_shared()}, - {"request_idx", std::make_shared()}, + {"requests_size", std::make_shared(), "The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same xid."}, + {"request_idx", std::make_shared(), "The number of the request included in multi request (for multi request — 0, then in order from 1)."}, - {"zxid", std::make_shared()}, - {"error", std::make_shared(error_enum)}, + {"zxid", std::make_shared(), "ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (0 if the request was not executed/returned an error/the client does not know whether the request was executed)."}, + {"error", std::make_shared(error_enum), "Error code. Can have many values, here are just some of them: ZOK — The request was executed successfully, ZCONNECTIONLOSS — The connection was lost, ZOPERATIONTIMEOUT — The request execution timeout has expired, ZSESSIONEXPIRED — The session has expired, NULL — The request is completed."}, - {"watch_type", std::make_shared(watch_type_enum)}, - {"watch_state", std::make_shared(watch_state_enum)}, + {"watch_type", std::make_shared(watch_type_enum), "The type of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, + {"watch_state", std::make_shared(watch_state_enum), "The status of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, - {"path_created", std::make_shared()}, + {"path_created", std::make_shared(), "The path to the created ZooKeeper node (for responses to the CREATE request), may differ from the path if the node is created as a sequential."}, - {"stat_czxid", std::make_shared()}, - {"stat_mzxid", std::make_shared()}, - {"stat_pzxid", std::make_shared()}, - {"stat_version", std::make_shared()}, - {"stat_cversion", std::make_shared()}, - {"stat_dataLength", std::make_shared()}, - {"stat_numChildren", std::make_shared()}, + {"stat_czxid", std::make_shared(), "The zxid of the change that caused this ZooKeeper node to be created."}, + {"stat_mzxid", std::make_shared(), "The zxid of the change that last modified this ZooKeeper node."}, + {"stat_pzxid", std::make_shared(), "The transaction ID of the change that last modified children of this ZooKeeper node."}, + {"stat_version", std::make_shared(), "The number of changes to the data of this ZooKeeper node."}, + {"stat_cversion", std::make_shared(), "The number of changes to the children of this ZooKeeper node."}, + {"stat_dataLength", std::make_shared(), "The length of the data field of this ZooKeeper node."}, + {"stat_numChildren", std::make_shared(), "The number of children of this ZooKeeper node."}, - {"children", std::make_shared(std::make_shared())}, + {"children", std::make_shared(std::make_shared()), "The list of child ZooKeeper nodes (for responses to LIST request)."}, }; } diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 0e4c5648b74..5ec683e1784 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -19,19 +19,18 @@ namespace DB ColumnsDescription StorageSystemCertificates::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"version", std::make_shared>()}, - {"serial_number", std::make_shared(std::make_shared())}, - {"signature_algo", std::make_shared(std::make_shared())}, - {"issuer", std::make_shared(std::make_shared())}, - {"not_before", std::make_shared(std::make_shared())}, - {"not_after", std::make_shared(std::make_shared())}, - {"subject", std::make_shared(std::make_shared())}, - {"pkey_algo", std::make_shared(std::make_shared())}, - {"path", std::make_shared()}, - {"default", std::make_shared>()} + {"version", std::make_shared>(), "Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3."}, + {"serial_number", std::make_shared(std::make_shared()), "Serial Number of the certificate assigned by the issuer."}, + {"signature_algo", std::make_shared(std::make_shared()), "Signature Algorithm - an algorithm used by the issuer to sign this certificate."}, + {"issuer", std::make_shared(std::make_shared()), "Issuer - an unique identifier for the Certificate Authority issuing this certificate."}, + {"not_before", std::make_shared(std::make_shared()), "The beginning of the time window when this certificate is valid."}, + {"not_after", std::make_shared(std::make_shared()), "The end of the time window when this certificate is valid."}, + {"subject", std::make_shared(std::make_shared()), "Subject - identifies the owner of the public key."}, + {"pkey_algo", std::make_shared(std::make_shared()), "Public Key Algorithm defines the algorithm the public key can be used with."}, + {"path", std::make_shared(), "Path to the file or directory containing this certificate."}, + {"default", std::make_shared>(), "Certificate is in the default certificate location."} }; } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6bc1208a6a9..5c96c6502af 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -31,27 +31,32 @@ StorageSystemColumns::StorageSystemColumns(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "position", std::make_shared() }, - { "default_kind", std::make_shared() }, - { "default_expression", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks_bytes", std::make_shared() }, - { "comment", std::make_shared() }, - { "is_in_partition_key", std::make_shared() }, - { "is_in_sorting_key", std::make_shared() }, - { "is_in_primary_key", std::make_shared() }, - { "is_in_sampling_key", std::make_shared() }, - { "compression_codec", std::make_shared() }, - { "character_octet_length", std::make_shared(std::make_shared()) }, - { "numeric_precision", std::make_shared(std::make_shared()) }, - { "numeric_precision_radix", std::make_shared(std::make_shared()) }, - { "numeric_scale", std::make_shared(std::make_shared()) }, - { "datetime_precision", std::make_shared(std::make_shared()) }, + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Column name."}, + { "type", std::make_shared(), "Column type."}, + { "position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + { "default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + { "default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks_bytes", std::make_shared(), "The size of marks, in bytes."}, + { "comment", std::make_shared(), "Comment on the column, or an empty string if it is not defined."}, + { "is_in_partition_key", std::make_shared(), "Flag that indicates whether the column is in the partition expression."}, + { "is_in_sorting_key", std::make_shared(), "Flag that indicates whether the column is in the sorting key expression."}, + { "is_in_primary_key", std::make_shared(), "Flag that indicates whether the column is in the primary key expression."}, + { "is_in_sampling_key", std::make_shared(), "Flag that indicates whether the column is in the sampling key expression."}, + { "compression_codec", std::make_shared(), "Compression codec name."}, + { "character_octet_length", std::make_shared(std::make_shared()), + "Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for FixedString data type. Otherwise, the NULL value is returned."}, + { "numeric_precision", std::make_shared(std::make_shared()), + "Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_precision_radix", std::make_shared(std::make_shared()), + "The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_scale", std::make_shared(std::make_shared()), + "The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for Decimal types. Otherwise, the NULL value is returned."}, + { "datetime_precision", std::make_shared(std::make_shared()), + "Decimal precision of DateTime64 data type. For other data types, the NULL value is returned."}, })); setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2fa74ef23e6..ff782647c79 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -26,16 +26,16 @@ StorageSystemDataSkippingIndices::StorageSystemDataSkippingIndices(const Storage StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "type_full", std::make_shared() }, - { "expr", std::make_shared() }, - { "granularity", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks", std::make_shared()} + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Index name."}, + { "type", std::make_shared(), "Index type."}, + { "type_full", std::make_shared(), "Index type expression from create statement."}, + { "expr", std::make_shared(), "Expression for the index calculation."}, + { "granularity", std::make_shared(), "The number of granules in the block."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks", std::make_shared(), "The size of marks, in bytes."} })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index fa74093a5a5..1eb79744022 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -271,18 +271,18 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription{{ - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"partition_id", std::make_shared(std::make_shared())}, - {"name", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"modification_time",std::make_shared()}, - {"disk", std::make_shared()}, - {"path", std::make_shared()}, - {"reason", std::make_shared(std::make_shared())}, - {"min_block_number", std::make_shared(std::make_shared())}, - {"max_block_number", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared())}, + {"database", std::make_shared(), "The name of the database this part belongs to."}, + {"table", std::make_shared(), "The name of the table this part belongs to."}, + {"partition_id", std::make_shared(std::make_shared()), "The identifier of the partition this part belongs to."}, + {"name", std::make_shared(), "The name of the part."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"modification_time",std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time when detach happened."}, + {"disk", std::make_shared(), "The name of the disk that stores this data part."}, + {"path", std::make_shared(), "The path of the disk to the file of this data part."}, + {"reason", std::make_shared(std::make_shared()), "The explanation why this part was detached."}, + {"min_block_number", std::make_shared(std::make_shared()), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(std::make_shared()), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(std::make_shared()), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, }}); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index a19741f92d1..8b528b4a298 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -82,7 +82,7 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() {"attribute.names", std::make_shared(std::make_shared()), "Array of attribute names provided by the dictionary."}, {"attribute.types", std::make_shared(std::make_shared()), "Corresponding array of attribute types provided by the dictionary."}, {"bytes_allocated", std::make_shared(), "Amount of RAM allocated for the dictionary."}, - {"hierarchical_index_bytes_allocated", std::make_shared(), ""}, + {"hierarchical_index_bytes_allocated", std::make_shared(), "Amount of RAM allocated for hierarchical index."}, {"query_count", std::make_shared(), "Number of queries since the dictionary was loaded or since the last successful reboot."}, {"hit_rate", std::make_shared(), "For cache dictionaries, the percentage of uses for which the value was in the cache."}, {"found_rate", std::make_shared(), "The percentage of uses for which the value was found."}, diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 30d64156b22..b010eff2fe3 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -18,19 +18,19 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"name", std::make_shared()}, - {"path", std::make_shared()}, - {"free_space", std::make_shared()}, - {"total_space", std::make_shared()}, - {"unreserved_space", std::make_shared()}, - {"keep_free_space", std::make_shared()}, - {"type", std::make_shared()}, - {"is_encrypted", std::make_shared()}, - {"is_read_only", std::make_shared()}, - {"is_write_once", std::make_shared()}, - {"is_remote", std::make_shared()}, - {"is_broken", std::make_shared()}, - {"cache_path", std::make_shared()}, + {"name", std::make_shared(), "Name of a disk in the server configuration."}, + {"path", std::make_shared(), "Path to the mount point in the file system."}, + {"free_space", std::make_shared(), "Free space on disk in bytes."}, + {"total_space", std::make_shared(), "Disk volume in bytes."}, + {"unreserved_space", std::make_shared(), "Free space which is not taken by reservations (free_space minus the size of reservations taken by merges, inserts, and other disk write operations currently running)."}, + {"keep_free_space", std::make_shared(), "Amount of disk space that should stay free on disk in bytes. Defined in the keep_free_space_bytes parameter of disk configuration."}, + {"type", std::make_shared(), "The disk type which tells where this disk stores the data - RAM, local drive or remote storage."}, + {"is_encrypted", std::make_shared(), "Flag which shows whether this disk ecrypts the underlying data. "}, + {"is_read_only", std::make_shared(), "Flag which indicates that you can only perform read operations with this disk."}, + {"is_write_once", std::make_shared(), "Flag which indicates if disk is write-once. Which means that it does support BACKUP to this disk, but does not support INSERT into MergeTree table on this disk."}, + {"is_remote", std::make_shared(), "Flag which indicated what operations with this disk involve network interaction."}, + {"is_broken", std::make_shared(), "Flag which indicates if disk is broken. Broken disks will have 0 space and cannot be used."}, + {"cache_path", std::make_shared(), "The path to the cache directory on local drive in case when the disk supports caching."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index 53cd76e4219..cfb388bc232 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -17,21 +17,21 @@ ColumnsDescription StorageSystemFilesystemCache::getColumnsDescription() /// TODO: Fill in all the comments. return ColumnsDescription { - {"cache_name", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"cache_path", std::make_shared()}, - {"key", std::make_shared()}, - {"file_segment_range_begin", std::make_shared()}, - {"file_segment_range_end", std::make_shared()}, - {"size", std::make_shared()}, - {"state", std::make_shared()}, - {"cache_hits", std::make_shared()}, - {"references", std::make_shared()}, - {"downloaded_size", std::make_shared()}, - {"kind", std::make_shared()}, - {"unbound", std::make_shared>()}, - {"user_id", std::make_shared()}, - {"file_size", std::make_shared(std::make_shared())}, + {"cache_name", std::make_shared(), "Name of the cache object. Can be used in `SYSTEM DESCRIBE FILESYSTEM CACHE `, `SYSTEM DROP FILESYSTEM CACHE ` commands"}, + {"cache_base_path", std::make_shared(), "Path to the base directory where all caches files (of a cache identidied by `cache_name`) are stored."}, + {"cache_path", std::make_shared(), "Path to a particular cache file, corresponding to a file segment in a source file"}, + {"key", std::make_shared(), "Cache key of the file segment"}, + {"file_segment_range_begin", std::make_shared(), "Offset corresponding to the beginning of the file segment range"}, + {"file_segment_range_end", std::make_shared(), "Offset corresponding to the (including) end of the file segment range"}, + {"size", std::make_shared(), "Size of the file segment"}, + {"state", std::make_shared(), "File segment state (DOWNLOADED, DOWNLOADING, PARTIALLY_DOWNLOADED, ...)"}, + {"cache_hits", std::make_shared(), "Number of cache hits of corresponding file segment"}, + {"references", std::make_shared(), "Number of references to corresponding file segment. Value 1 means that nobody uses it at the moment (the only existing reference is in cache storage itself)"}, + {"downloaded_size", std::make_shared(), "Downloaded size of the file segment"}, + {"kind", std::make_shared(), "File segment kind (used to distringuish between file segments added as a part of 'Temporary data in cache')"}, + {"unbound", std::make_shared>(), "Internal implementation flag"}, + {"user_id", std::make_shared(), "User id of the user which created the file segment"}, + {"file_size", std::make_shared(std::make_shared()), "File size of the file to which current file segment belongs"}, }; } diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index a638a08fac7..eaa386763c2 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -11,7 +11,11 @@ ColumnsDescription StorageSystemGraphite::getColumnsDescription() return ColumnsDescription { {"config_name", std::make_shared(), "graphite_rollup parameter name."}, - {"rule_type", std::make_shared(), ""}, + {"rule_type", std::make_shared(), + "The rule type. Possible values: RuleTypeAll = 0 - default, with regex, compatible with old scheme; " + "RuleTypePlain = 1 - plain metrics, with regex, compatible with old scheme; " + "RuleTypeTagged = 2 - tagged metrics, with regex, compatible with old scheme; " + "RuleTypeTagList = 3 - tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)')"}, {"regexp", std::make_shared(), "A pattern for the metric name."}, {"function", std::make_shared(), "The name of the aggregating function."}, {"age", std::make_shared(), "The minimum age of the data in seconds."}, diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 6c9b1681c8b..86713632339 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -41,8 +41,8 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"last_rebalance_time", std::make_shared(), "Timestamp of the most recent Kafka rebalance."}, {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, - {"is_currently_used", std::make_shared(), "Consumer is in use."}, - {"last_used", std::make_shared(6)}, + {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, + {"last_used", std::make_shared(6), "The last time this consumer was in use."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index fac653e524e..0fca5dc84a2 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -16,12 +16,12 @@ ColumnsDescription StorageSystemMerges::getColumnsDescription() {"elapsed", std::make_shared(), "The time elapsed (in seconds) since the merge started."}, {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, {"num_parts", std::make_shared(), "The number of parts to be merged."}, - {"source_part_names", std::make_shared(std::make_shared()), ""}, + {"source_part_names", std::make_shared(std::make_shared()), "The list of source parts names."}, {"result_part_name", std::make_shared(), "The name of the part that will be formed as the result of merging."}, - {"source_part_paths", std::make_shared(std::make_shared()), ""}, - {"result_part_path", std::make_shared(), ""}, - {"partition_id", std::make_shared()}, - {"partition", std::make_shared()}, + {"source_part_paths", std::make_shared(std::make_shared()), "The list of paths for each source part."}, + {"result_part_path", std::make_shared(), "The path of the part that will be formed as the result of merging."}, + {"partition_id", std::make_shared(), "The identifier of the partition where the merge is happening."}, + {"partition", std::make_shared(), "The name of the partition"}, {"is_mutation", std::make_shared(), "1 if this process is a part mutation."}, {"total_size_bytes_compressed", std::make_shared(), "The total size of the compressed data in the merged chunks."}, {"total_size_bytes_uncompressed", std::make_shared(), "The total size of compressed data in the merged chunks."}, diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index e715238ddd4..8960d0625ba 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -13,13 +13,12 @@ namespace DB ColumnsDescription StorageSystemModels::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - { "model_path", std::make_shared() }, - { "type", std::make_shared() }, - { "loading_start_time", std::make_shared() }, - { "loading_duration", std::make_shared() }, + { "model_path", std::make_shared(), "Path to trained model."}, + { "type", std::make_shared(), "Model type. Now catboost only."}, + { "loading_start_time", std::make_shared(), "The time when the loading of the model started."}, + { "loading_duration", std::make_shared(), "How much time did it take to load the model."}, }; } diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 7d263d9468d..60b80e0b0ad 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -38,7 +38,7 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() "1 if the mutation is completed, " "0 if the mutation is still in process. " }, - { "is_killed", std::make_shared() }, + { "is_killed", std::make_shared(), "Only available in ClickHouse Cloud."}, { "latest_failed_part", std::make_shared(), "The name of the most recent part that could not be mutated."}, { "latest_fail_time", std::make_shared(), "The date and time of the most recent part mutation failure."}, { "latest_fail_reason", std::make_shared(), "The exception message that caused the most recent part mutation failure."}, diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index c5d8b307368..10898f79d10 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -19,6 +19,8 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all the tables which were created via + /// CREATE TABLE test as numbers(5) storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 3091ffdb51a..936d55e61a0 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -15,6 +15,8 @@ StorageSystemOne::StorageSystemOne(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all tables created via: + /// CREATE TABLE test (dummy UInt8) ENGINE = Distributed(`default`, `system.one`) storage_metadata.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index 9dd2ba0b156..9cba92bca12 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -18,28 +18,27 @@ namespace DB ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() { - /// TODO: Fill in all the comments return ColumnsDescription { /// Table properties. - { "database", std::make_shared() }, - { "table", std::make_shared() }, + { "database", std::make_shared(), "The name of the database where move is performed."}, + { "table", std::make_shared(), "The name of the table where move is performed."}, /// Constant element properties. - { "task_name", std::make_shared() }, - { "task_uuid", std::make_shared() }, - { "create_time", std::make_shared() }, - { "part_name", std::make_shared() }, - { "part_uuid", std::make_shared() }, - { "to_shard", std::make_shared() }, - { "dst_part_name", std::make_shared() }, + { "task_name", std::make_shared(), "The name of the moving task."}, + { "task_uuid", std::make_shared(), "The identifier of the moving task."}, + { "create_time", std::make_shared(), "The time when the task was created."}, + { "part_name", std::make_shared(), "The name of the part which is in a process of moving."}, + { "part_uuid", std::make_shared(), "The UUID of the part which is in a process of moving."}, + { "to_shard", std::make_shared(), "The name of the destination shard."}, + { "dst_part_name", std::make_shared(), "The result part name."}, /// Processing status of item. - { "update_time", std::make_shared() }, - { "state", std::make_shared() }, - { "rollback", std::make_shared() }, - { "num_tries", std::make_shared() }, - { "last_exception", std::make_shared() }, + { "update_time", std::make_shared(), "The last time update was performed."}, + { "state", std::make_shared(), "The current state of the move."}, + { "rollback", std::make_shared(), "The flag which indicated whether the operation was rolled back."}, + { "num_tries", std::make_shared(), "The number of tries to complete the operation."}, + { "last_exception", std::make_shared(), "The last exception name if any."}, }; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b6e4ee4161e..1b800fd64a9 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -94,33 +94,33 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"projections", std::make_shared(std::make_shared())}, + {"projections", std::make_shared(std::make_shared()), "The list of projection names calculated for this part."}, - {"visible", std::make_shared()}, - {"creation_tid", getTransactionIDDataType()}, - {"removal_tid_lock", std::make_shared()}, - {"removal_tid", getTransactionIDDataType()}, - {"creation_csn", std::make_shared()}, - {"removal_csn", std::make_shared()}, + {"visible", std::make_shared(), "Flag which indicated whether this part is visible for SELECT queries."}, + {"creation_tid", getTransactionIDDataType(), "ID of transaction that has created/is trying to create this object."}, + {"removal_tid_lock", std::make_shared(), "Hash of removal_tid, used to lock an object for removal."}, + {"removal_tid", getTransactionIDDataType(), "ID of transaction that has removed/is trying to remove this object"}, + {"creation_csn", std::make_shared(), "CSN of transaction that has created this object"}, + {"removal_csn", std::make_shared(), "CSN of transaction that has removed this object"}, - {"has_lightweight_delete", std::make_shared()}, + {"has_lightweight_delete", std::make_shared(), "The flag which indicated whether the part has lightweight delete mask."}, - {"last_removal_attempt_time", std::make_shared()}, - {"removal_state", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared(), "The last time the server tried to delete this part."}, + {"removal_state", std::make_shared(), "The current state of part removal process."}, } ) { diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 833a5e1ec16..f34b0e0cfda 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -22,59 +22,60 @@ namespace DB StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"uuid", std::make_shared(), "The parts UUID."}, + {"part_type", std::make_shared(), "The data part storing format. " + "Possible values: Wide — Each column is stored in a separate file in a filesystem, Compact — All columns are stored in one file in a filesystem."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(),"The size of the file with marks."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of the compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the marks for column, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, - {"serialization_kind", std::make_shared()}, - {"substreams", std::make_shared(std::make_shared())}, - {"filenames", std::make_shared(std::make_shared())}, - {"subcolumns.names", std::make_shared(std::make_shared())}, - {"subcolumns.types", std::make_shared(std::make_shared())}, - {"subcolumns.serializations", std::make_shared(std::make_shared())}, - {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared())}, - {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.marks_bytes", std::make_shared(std::make_shared())}, + {"serialization_kind", std::make_shared(), "Kind of serialization of a column"}, + {"substreams", std::make_shared(std::make_shared()), "Names of substreams to which column is serialized"}, + {"filenames", std::make_shared(std::make_shared()), "Names of files for each substream of a column respectively"}, + {"subcolumns.names", std::make_shared(std::make_shared()), "Names of subcolumns of a column"}, + {"subcolumns.types", std::make_shared(std::make_shared()), "Types of subcolumns of a column"}, + {"subcolumns.serializations", std::make_shared(std::make_shared()), "Kinds of serialization of subcolumns of a column"}, + {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared()), "Sizes in bytes for each subcolumn"}, + {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared()), "Sizes of the compressed data for each subcolumn, in bytes"}, + {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared()), "Sizes of the decompressed data for each subcolumn, in bytes"}, + {"subcolumns.marks_bytes", std::make_shared(std::make_shared()), "Sizes of the marks for each subcolumn of a column, in bytes"}, } ) { diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index 4d749ab0815..58dcf62115e 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -68,13 +68,21 @@ const std::vector> & StorageSystemPrivileges::getAccess ColumnsDescription StorageSystemPrivileges::getColumnsDescription() { - /// TODO: Fill in all the comments. - return ColumnsDescription - { - {"privilege", std::make_shared(getAccessTypeEnumValues())}, - {"aliases", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, - {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, + return ColumnsDescription{ + {"privilege", + std::make_shared(getAccessTypeEnumValues()), + "Name of a privilege which can be used in the GRANT command."}, + {"aliases", + std::make_shared(std::make_shared()), + "List of aliases which can be used instead of the name of the privilege."}, + {"level", + std::make_shared(std::make_shared(getLevelEnumValues())), + "Level of the privilege. GLOBAL privileges can be granted only globally (ON *.*), DATABASE privileges can be granted " + "on a specific database (ON .*) or globally (ON *.*), TABLE privileges can be granted either on a specific table or " + "on a specific database or globally, and COLUMN privileges can be granted like TABLE privileges but also allow to specify columns."}, + {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues())), + "Parent privilege - if the parent privilege is granted then all its children privileges are considered as granted too." + }, }; } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index b6fd1aabd45..bef98e59687 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -21,35 +21,35 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"is_initial_query", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Whether this query comes directly from user or was issues by ClickHouse server in a scope of distributed query execution."}, {"user", std::make_shared(), "The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the default user. The field contains the username for a specific query, not for a query that this query initiated."}, {"query_id", std::make_shared(), "Query ID, if defined."}, - {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, - {"port", std::make_shared()}, + {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the query was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, + {"port", std::make_shared(), "The client port the query was made from."}, - {"initial_user", std::make_shared()}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, + {"initial_user", std::make_shared(), "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, - {"interface", std::make_shared()}, + {"interface", std::make_shared(), "The interface which was used to send the query. TCP = 1, HTTP = 2, GRPC = 3, MYSQL = 4, POSTGRESQL = 5, LOCAL = 6, TCP_INTERSERVER = 7."}, - {"os_user", std::make_shared()}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"os_user", std::make_shared(), "Operating system username who runs clickhouse-client."}, + {"client_hostname", std::make_shared(), "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"http_method", std::make_shared()}, - {"http_user_agent", std::make_shared()}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface. 1 — GET method was used. 2 — POST method was used."}, + {"http_user_agent", std::make_shared(), "HTTP header UserAgent passed in the HTTP query."}, + {"http_referer", std::make_shared(), "HTTP header Referer passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header X-Forwarded-For passed in the HTTP query."}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"quota_key", std::make_shared(), "The quota key specified in the quotas setting (see keyed)."}, + {"distributed_depth", std::make_shared(), "The number of times query was retransmitted between server nodes internally."}, {"elapsed", std::make_shared(), "The time in seconds since request execution started."}, {"is_cancelled", std::make_shared(), "Was query cancelled."}, @@ -57,18 +57,18 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() {"read_rows", std::make_shared(), "The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"read_bytes", std::make_shared(), "The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"total_rows_approx", std::make_shared(), "The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known."}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, + {"written_rows", std::make_shared(), "The amount of rows written to the storage."}, + {"written_bytes", std::make_shared(), "The amount of bytes written to the storage."}, {"memory_usage", std::make_shared(), "Amount of RAM the query uses. It might not include some types of dedicated memory"}, - {"peak_memory_usage", std::make_shared()}, + {"peak_memory_usage", std::make_shared(), "The current peak of memory usage."}, {"query", std::make_shared(), "The query text. For INSERT, it does not include the data to insert."}, - {"query_kind", std::make_shared()}, + {"query_kind", std::make_shared(), "The type of the query - SELECT, INSERT, etc."}, - {"thread_ids", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"Settings", std::make_shared(std::make_shared(), std::make_shared())}, + {"thread_ids", std::make_shared(std::make_shared()), "The list of identificators of all threads which executed this query."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents calculated for this query."}, + {"Settings", std::make_shared(std::make_shared(), std::make_shared()), "The list of modified user-level settings."}, - {"current_database", std::make_shared()}, + {"current_database", std::make_shared(), "The name of the current database."}, }; description.setAliases({ diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 016705f4e66..4bdcea67313 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -19,71 +19,71 @@ namespace DB StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format. Possible Values: Wide (a file per column) and Compact (a single file for all columns)."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's about to be deleted. Inactive data parts appear after merging and mutating operations."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value of the date key in the data part."}, + {"max_date", std::make_shared(), "The maximum value of the date key in the data part."}, + {"min_time", std::make_shared(), "The minimum value of the date and time key in the data part."}, + {"max_time", std::make_shared(), "The maximum value of the date and time key in the data part."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, + {"is_frozen", std::make_shared(), "Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. "}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"hash_of_all_files", std::make_shared(), "sipHash128 of compressed files."}, + {"hash_of_uncompressed_files", std::make_shared(), "sipHash128 of uncompressed files (files with marks, index file etc.)."}, + {"uncompressed_hash_of_compressed_files", std::make_shared(), "sipHash128 of data in the compressed files as if they were uncompressed."}, - {"delete_ttl_info_min", std::make_shared()}, - {"delete_ttl_info_max", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(), "The minimum value of the date and time key for TTL DELETE rule."}, + {"delete_ttl_info_max", std::make_shared(), "The maximum value of the date and time key for TTL DELETE rule."}, - {"move_ttl_info.expression", std::make_shared(std::make_shared())}, - {"move_ttl_info.min", std::make_shared(std::make_shared())}, - {"move_ttl_info.max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared()), "Array of expressions. Each expression defines a TTL MOVE rule."}, + {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, + {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())} + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, } ) { diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 2ff25f86366..9521605688d 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -19,56 +19,56 @@ namespace DB StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name. "}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active"}, + {"marks", std::make_shared(), "The number of marks."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the column with marks, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, } ) { diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index dba449d4f1d..65acfba0c1b 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -68,7 +68,8 @@ ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() data_type = std::make_shared(); else data_type = std::make_shared(); - result.add({column_name, std::make_shared(data_type)}); + + result.add({column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return result; diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 2df36aee240..da9c174b0d3 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -81,8 +81,8 @@ ColumnsDescription StorageSystemQuotaUsage::getColumnsDescriptionImpl(bool add_c data_type = std::make_shared(); else data_type = std::make_shared(); - description.add({column_name, std::make_shared(data_type)}); - description.add({String("max_") + column_name, std::make_shared(data_type)}); + description.add({column_name, std::make_shared(data_type), type_info.current_usage_description}); + description.add({String("max_") + column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return description; diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 55002d6d00f..7cc1951fc05 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -20,14 +20,14 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"size", std::make_shared()}, - {"common_prefix_for_blobs", std::make_shared()}, - {"cache_paths", std::make_shared(std::make_shared())}, + {"disk_name", std::make_shared(), "Disk name."}, + {"path", std::make_shared(), "Disk path."}, + {"cache_base_path", std::make_shared(), "Base directory of cache files."}, + {"local_path", std::make_shared(), "Path of ClickHouse file, also used as metadata path."}, + {"remote_path", std::make_shared(), "Blob path in object storage, with which ClickHouse file is associated with."}, + {"size", std::make_shared(), "Size of the file (compressed)."}, + {"common_prefix_for_blobs", std::make_shared(), "Common prefix for blobs in object storage."}, + {"cache_paths", std::make_shared(std::make_shared()), "Cache files for corresponding blob."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 26dead8cb01..af8d67cbc21 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -196,41 +196,47 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "engine", std::make_shared() }, - { "is_leader", std::make_shared() }, - { "can_become_leader", std::make_shared() }, - { "is_readonly", std::make_shared() }, - { "is_session_expired", std::make_shared() }, - { "future_parts", std::make_shared() }, - { "parts_to_check", std::make_shared() }, - { "zookeeper_name", std::make_shared() }, - { "zookeeper_path", std::make_shared() }, - { "replica_name", std::make_shared() }, - { "replica_path", std::make_shared() }, - { "columns_version", std::make_shared() }, - { "queue_size", std::make_shared() }, - { "inserts_in_queue", std::make_shared() }, - { "merges_in_queue", std::make_shared() }, - { "part_mutations_in_queue", std::make_shared() }, - { "queue_oldest_time", std::make_shared() }, - { "inserts_oldest_time", std::make_shared() }, - { "merges_oldest_time", std::make_shared() }, - { "part_mutations_oldest_time", std::make_shared() }, - { "oldest_part_to_get", std::make_shared() }, - { "oldest_part_to_merge_to", std::make_shared() }, - { "oldest_part_to_mutate_to", std::make_shared() }, - { "log_max_index", std::make_shared() }, - { "log_pointer", std::make_shared() }, - { "last_queue_update", std::make_shared() }, - { "absolute_delay", std::make_shared() }, - { "total_replicas", std::make_shared() }, - { "active_replicas", std::make_shared() }, - { "lost_part_count", std::make_shared() }, - { "last_queue_update_exception", std::make_shared() }, - { "zookeeper_exception", std::make_shared() }, - { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()) } + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "engine", std::make_shared(), "Table engine name."}, + { "is_leader", std::make_shared(), "Whether the replica is the leader. Multiple replicas can be leaders at the same time. " + "A replica can be prevented from becoming a leader using the merge_tree setting replicated_can_become_leader. " + "The leaders are responsible for scheduling background merges. " + "Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader."}, + { "can_become_leader", std::make_shared(), "Whether the replica can be a leader."}, + { "is_readonly", std::make_shared(), "Whether the replica is in read-only mode. This mode is turned on if the config does not have sections with ClickHouse Keeper, " + "if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper."}, + { "is_session_expired", std::make_shared(), "Whether the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`."}, + { "future_parts", std::make_shared(), "The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet."}, + { "parts_to_check", std::make_shared(), "The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged."}, + { "zookeeper_name", std::make_shared(), "The name of the the [Zoo]Keeper cluster (possibly auxiliary one) where the table's metadata is stored"}, + { "zookeeper_path", std::make_shared(), "Path to table data in ClickHouse Keeper."}, + { "replica_name", std::make_shared(), "Replica name in ClickHouse Keeper. Different replicas of the same table have different names."}, + { "replica_path", std::make_shared(), "Path to replica data in ClickHouse Keeper. The same as concatenating 'zookeeper_path/replicas/replica_path'."}, + { "columns_version", std::make_shared(), "Version number of the table structure. Indicates how many times ALTER was performed. " + "If replicas have different versions, it means some replicas haven't made all of the ALTERs yet."}, + { "queue_size", std::make_shared(), "Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with future_parts."}, + { "inserts_in_queue", std::make_shared(), "Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong."}, + { "merges_in_queue", std::make_shared(), "The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time."}, + { "part_mutations_in_queue", std::make_shared(), "The number of mutations waiting to be made."}, + { "queue_oldest_time", std::make_shared(), "If `queue_size` greater than 0, shows when the oldest operation was added to the queue."}, + { "inserts_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "merges_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "part_mutations_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "oldest_part_to_get", std::make_shared(), "The name of the part to fetch from other replicas obtained from the oldest GET_PARTS entry in the replication queue."}, + { "oldest_part_to_merge_to", std::make_shared(), "The result part name to merge to obtained from the oldest MERGE_PARTS entry in the replication queue."}, + { "oldest_part_to_mutate_to", std::make_shared(), "The result part name to mutate to obtained from the oldest MUTATE_PARTS entry in the replication queue."}, + { "log_max_index", std::make_shared(), "Maximum entry number in the log of general activity."}, + { "log_pointer", std::make_shared(), "Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. " + "If log_pointer is much smaller than log_max_index, something is wrong."}, + { "last_queue_update", std::make_shared(), "When the queue was updated last time."}, + { "absolute_delay", std::make_shared(), "How big lag in seconds the current replica has."}, + { "total_replicas", std::make_shared(), "The total number of known replicas of this table."}, + { "active_replicas", std::make_shared(), "The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas)."}, + { "lost_part_count", std::make_shared(), "The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase."}, + { "last_queue_update_exception", std::make_shared(), "When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions."}, + { "zookeeper_exception", std::make_shared(), "The last exception message, got if the error happened when fetching the info from ClickHouse Keeper."}, + { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()), "Map between replica name and is replica active."} })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index ea819e88993..93c5ba60a7f 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -38,8 +38,8 @@ ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() for (auto filter_type : collections::range(RowPolicyFilterType::MAX)) { - const String & column_name = RowPolicyFilterTypeInfo::get(filter_type).name; - description.add({column_name, std::make_shared(std::make_shared())}); + const auto & filter_type_info = RowPolicyFilterTypeInfo::get(filter_type); + description.add({filter_type_info.name, std::make_shared(std::make_shared()), filter_type_info.description}); } description.add({"is_restrictive", std::make_shared(), diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 557f0fd1208..a6bb7da2b6e 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -25,14 +25,14 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"zookeeper_path", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", std::make_shared()}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, + {"rows_processed", std::make_shared(), "Currently processed number of rows"}, + {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, + {"exception", std::make_shared(), "Exception which happened during processing"}, }; } diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index ecac76b73a3..de47ec52031 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -12,7 +12,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"version", std::make_shared()}, + {"version", std::make_shared(), "The ClickHouse server version."}, {"changes", std::make_shared(std::make_shared( DataTypes{ @@ -20,7 +20,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() std::make_shared(), std::make_shared(), std::make_shared()}, - Names{"name", "previous_value", "new_value", "reason"}))}, + Names{"name", "previous_value", "new_value", "reason"})), "The list of changes in settings which changed the behaviour of ClickHouse."}, }; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index 6ac5d13a249..2af3e6dfd05 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() {"value", std::make_shared(std::make_shared()), "Setting value."}, {"min", std::make_shared(std::make_shared()), "The minimum value of the setting. NULL if not set."}, {"max", std::make_shared(std::make_shared()), "The maximum value of the setting. NULL if not set."}, - {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues()))}, + {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues())), "The property which shows whether a setting can be changed or not."}, {"inherit_profile", std::make_shared(std::make_shared()), "A parent profile for this setting profile. NULL if not set. " "Setting profile will inherit all the settings' values and constraints (min, max, readonly) from its parent profiles." diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 74864bb50e1..ba7433fb9ae 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -507,11 +507,11 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "thread_name", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "query_id", std::make_shared() }, - { "trace", std::make_shared(std::make_shared()) }, - }, { /* aliases */ })); + {"thread_name", std::make_shared(), "The name of the thread."}, + {"thread_id", std::make_shared(), "The thread identifier"}, + {"query_id", std::make_shared(), "The ID of the query this thread belongs to."}, + {"trace", std::make_shared(std::make_shared()), "The stacktrace of this thread. Basically just an array of addresses."}, + })); setInMemoryMetadata(storage_metadata); notification_pipe.open(); diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 39e99884e1d..21251136f7d 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -37,16 +37,16 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns( ColumnsDescription({ - {"policy_name", std::make_shared()}, - {"volume_name", std::make_shared()}, - {"volume_priority", std::make_shared()}, - {"disks", std::make_shared(std::make_shared())}, - {"volume_type", std::make_shared(getTypeEnumValues())}, - {"max_data_part_size", std::make_shared()}, - {"move_factor", std::make_shared()}, - {"prefer_not_to_merge", std::make_shared()}, - {"perform_ttl_move_on_insert", std::make_shared()}, - {"load_balancing", std::make_shared(getTypeEnumValues())} + {"policy_name", std::make_shared(), "The name of the storage policy."}, + {"volume_name", std::make_shared(), "The name of the volume."}, + {"volume_priority", std::make_shared(), "The priority of the volume."}, + {"disks", std::make_shared(std::make_shared()), "The list of all disks names which are a part of this storage policy."}, + {"volume_type", std::make_shared(getTypeEnumValues()), "The type of the volume - JBOD or a single disk."}, + {"max_data_part_size", std::make_shared(), "the maximum size of a part that can be stored on any of the volumes disks."}, + {"move_factor", std::make_shared(), "When the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1)."}, + {"prefer_not_to_merge", std::make_shared(), "You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation)."}, + {"perform_ttl_move_on_insert", std::make_shared(), "Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule."}, + {"load_balancing", std::make_shared(getTypeEnumValues()), "Policy for disk balancing, `round_robin` or `least_used`."} })); // TODO: Add string column with custom volume-type-specific options setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp index 56195544448..6b313407e61 100644 --- a/src/Storages/System/StorageSystemSymbols.cpp +++ b/src/Storages/System/StorageSystemSymbols.cpp @@ -22,9 +22,9 @@ StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"symbol", std::make_shared()}, - {"address_begin", std::make_shared()}, - {"address_end", std::make_shared()}, + {"symbol", std::make_shared(), "Symbol name in the binary. It is mangled. You can apply demangle(symbol) to obtain a readable name."}, + {"address_begin", std::make_shared(), "Start address of the symbol in the binary."}, + {"address_end", std::make_shared(), "End address of the symbol in the binary."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 6904eba8ab5..639c1455b83 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -67,9 +67,9 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." }, - {"parts", std::make_shared(std::make_shared())}, - {"active_parts", std::make_shared(std::make_shared())}, - {"total_marks", std::make_shared(std::make_shared())}, + {"parts", std::make_shared(std::make_shared()), "The total number of parts in this table."}, + {"active_parts", std::make_shared(std::make_shared()), "The number of active parts in this table."}, + {"total_marks", std::make_shared(std::make_shared()), "The total number of marks in all parts in this table."}, {"lifetime_rows", std::make_shared(std::make_shared()), "Total number of rows INSERTed since server start (only for Buffer tables)." }, diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index edc3739e713..295d93edf7d 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -23,14 +23,13 @@ static DataTypePtr getStateEnumType() ColumnsDescription StorageSystemTransactions::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"is_readonly", std::make_shared()}, - {"state", getStateEnumType()}, + {"tid", getTransactionIDDataType(), "The identifier of the transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, + {"elapsed", std::make_shared(), "The amount of time the transaction being processed."}, + {"is_readonly", std::make_shared(), "The flag which shows whether the transaction has executed any write operation."}, + {"state", getStateEnumType(), "The state of the transaction. Possible values: RUNNING, COMMITTING, COMMITTED, ROLLED_BACK."}, }; } diff --git a/src/Storages/System/StorageSystemUserDirectories.cpp b/src/Storages/System/StorageSystemUserDirectories.cpp index 7b6c1144ae1..1b3469e7597 100644 --- a/src/Storages/System/StorageSystemUserDirectories.cpp +++ b/src/Storages/System/StorageSystemUserDirectories.cpp @@ -11,13 +11,12 @@ namespace DB { ColumnsDescription StorageSystemUserDirectories::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"name", std::make_shared()}, - {"type", std::make_shared()}, - {"params", std::make_shared()}, - {"precedence", std::make_shared()}, + {"name", std::make_shared(), "The name of the directory."}, + {"type", std::make_shared(), "The type of the access storage e.g. users.xml or replicated or memory etc."}, + {"params", std::make_shared(), "JSON with the parameters of the access storage."}, + {"precedence", std::make_shared(), "The order in which this directory is declared in the config. The same order is used when ClickHouse tries to find a user or role."}, }; } diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp index d36129aea63..65fbeedf406 100644 --- a/src/Storages/System/StorageSystemUserProcesses.cpp +++ b/src/Storages/System/StorageSystemUserProcesses.cpp @@ -18,10 +18,10 @@ ColumnsDescription StorageSystemUserProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"user", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"user", std::make_shared(), "User name."}, + {"memory_usage", std::make_shared(), "Sum of RAM used by all processes of the user. It might not include some types of dedicated memory. See the max_memory_usage setting."}, + {"peak_memory_usage", std::make_shared(), "The peak of memory usage of the user. It can be reset when no queries are run for the user."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Summary of ProfileEvents that measure different metrics for the user. The description of them could be found in the table system.events"}, }; description.setAliases({ diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 4734aeaaa82..0c34f04844d 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -69,10 +69,10 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() {"default_roles_except", std::make_shared(std::make_shared()), "All the granted roles set as default excepting of the listed ones." }, - {"grantees_any", std::make_shared()}, - {"grantees_list", std::make_shared(std::make_shared())}, - {"grantees_except", std::make_shared(std::make_shared())}, - {"default_database", std::make_shared()}, + {"grantees_any", std::make_shared(), "The flag that indicates whether a user with any grant option can grant it to anyone."}, + {"grantees_list", std::make_shared(std::make_shared()), "The list of users or roles to which this user is allowed to grant options to."}, + {"grantees_except", std::make_shared(std::make_shared()), "The list of users or roles to which this user is forbidden from grant options to."}, + {"default_database", std::make_shared(), "The name of the default database for this user."}, }; } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index a0ed1d3c3d3..30539ed6b6a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -36,15 +36,15 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() }, {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, - {"elapsed", std::make_shared()}, - {"read_rows", std::make_shared(), "Number of rows read by the current refresh so far."}, - {"read_bytes", std::make_shared()}, + {"elapsed", std::make_shared(), "The amount of nanoseconds the current refresh took."}, + {"read_rows", std::make_shared(), "Number of rows read during the current refresh."}, + {"read_bytes", std::make_shared(), "Number of bytes read during the current refresh."}, {"total_rows", std::make_shared(), "Estimated total number of rows that need to be read by the current refresh."}, - {"total_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"result_rows", std::make_shared()}, - {"result_bytes", std::make_shared()}, + {"total_bytes", std::make_shared(), "Estimated total number of bytes that need to be read by the current refresh."}, + {"written_rows", std::make_shared(), "Number of rows written during the current refresh."}, + {"written_bytes", std::make_shared(), "Number rof bytes written during the current refresh."}, + {"result_rows", std::make_shared(), "Estimated total number of rows in the result set of the SELECT query."}, + {"result_bytes", std::make_shared(), "Estimated total number of bytes in the result set of the SELECT query."}, }; } diff --git a/tests/queries/0_stateless/02294_system_certificates.reference b/tests/queries/0_stateless/02294_system_certificates.reference index 4655f996c40..432df9110a2 100644 --- a/tests/queries/0_stateless/02294_system_certificates.reference +++ b/tests/queries/0_stateless/02294_system_certificates.reference @@ -1,10 +1,10 @@ -version Int32 -serial_number Nullable(String) -signature_algo Nullable(String) -issuer Nullable(String) -not_before Nullable(String) -not_after Nullable(String) -subject Nullable(String) -pkey_algo Nullable(String) -path String -default UInt8 +version Int32 Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3. +serial_number Nullable(String) Serial Number of the certificate assigned by the issuer. +signature_algo Nullable(String) Signature Algorithm - an algorithm used by the issuer to sign this certificate. +issuer Nullable(String) Issuer - an unique identifier for the Certificate Authority issuing this certificate. +not_before Nullable(String) The beginning of the time window when this certificate is valid. +not_after Nullable(String) The end of the time window when this certificate is valid. +subject Nullable(String) Subject - identifies the owner of the public key. +pkey_algo Nullable(String) Public Key Algorithm defines the algorithm the public key can be used with. +path String Path to the file or directory containing this certificate. +default UInt8 Certificate is in the default certificate location. diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.reference b/tests/queries/0_stateless/02326_settings_changes_system_table.reference index 1c8c4fa1880..946b2727d30 100644 --- a/tests/queries/0_stateless/02326_settings_changes_system_table.reference +++ b/tests/queries/0_stateless/02326_settings_changes_system_table.reference @@ -1,3 +1,3 @@ -version String -changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) +version String The ClickHouse server version. +changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) The list of changes in settings which changed the behaviour of ClickHouse. 22.5 [('memory_overcommit_ratio_denominator','0','1073741824','Enable memory overcommit feature by default'),('memory_overcommit_ratio_denominator_for_user','0','1073741824','Enable memory overcommit feature by default')] diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference b/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql new file mode 100644 index 00000000000..b984c12e285 --- /dev/null +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -0,0 +1,4 @@ +SYSTEM FLUSH LOGS; +SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' should have a comment' +FROM system.columns +WHERE (database = 'system') AND (comment = '') AND (table NOT ILIKE '%_log_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one')) AND (default_kind != 'ALIAS'); From b5289bf7abfe6367f88f0bfab77f39a4eae28438 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 8 Mar 2024 11:19:48 -0800 Subject: [PATCH 340/356] Reduce flakiness of 02932_refreshable_materialized_views (#60771) * Reduce flakiness of 02932_refreshable_materialized_views * Fix --- .../02932_refreshable_materialized_views.reference | 6 +++--- .../02932_refreshable_materialized_views.sh | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference index b52d0847ff9..aa76806da9d 100644 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference @@ -1,8 +1,8 @@ <1: created view> a [] 1 -CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 1 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x +CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 2 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x <2: refreshed> 3 1 1 -<3: time difference at least> 500 -<4: next refresh in> 1 +<3: time difference at least> 1000 +<4: next refresh in> 2 <4.5: altered> Scheduled Finished 2052-01-01 00:00:00 CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` Int16\n)\nENGINE = Memory\nAS SELECT x * 2 AS x\nFROM default.src <5: no refresh> 3 diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 8daea063fc5..89942e25b67 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refre # Basic refreshing. $CLICKHOUSE_CLIENT -nq " create materialized view a - refresh after 1 second + refresh after 2 second engine Memory empty as select number as x from numbers(2) union all select rand64() as x" @@ -29,6 +29,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $ do sleep 0.1 done +start_time="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Check table contents. $CLICKHOUSE_CLIENT -nq "select '<2: refreshed>', count(), sum(x=0), sum(x=1) from a" # Wait for table contents to change. @@ -39,7 +40,6 @@ do [ "$res2" == "$res1" ] || break sleep 0.1 done -time2="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Wait for another change. while : do @@ -47,11 +47,11 @@ do [ "$res3" == "$res2" ] || break sleep 0.1 done -# Check that the two changes were at least 500ms apart, in particular that we're not refreshing +# Check that the two changes were at least 1 second apart, in particular that we're not refreshing # like crazy. This is potentially flaky, but we need at least one test that uses non-mocked timer # to make sure the clock+timer code works at all. If it turns out flaky, increase refresh period above. $CLICKHOUSE_CLIENT -nq " - select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $time2, 500); + select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $start_time, 1000); select '<4: next refresh in>', next_refresh_time-last_refresh_time from refreshes;" # Create a source table from which views will read. @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT -nq " # Switch to fake clock, change refresh schedule, change query. $CLICKHOUSE_CLIENT -nq " system test view a set fake time '2050-01-01 00:00:01';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:02' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:03' ] do sleep 0.1 done From 6b4867f74656b0d090fda38432f2e4e8efe9106f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 8 Mar 2024 21:27:56 +0100 Subject: [PATCH 341/356] Debug fuzzer failures (#61062) --- docker/test/fuzzer/run-fuzzer.sh | 7 +++---- src/Parsers/ASTAlterQuery.cpp | 1 + src/Parsers/ASTCheckQuery.h | 1 + src/Parsers/ASTCreateIndexQuery.cpp | 1 + src/Parsers/ASTCreateQuery.cpp | 2 ++ src/Parsers/ASTDeleteQuery.cpp | 1 + src/Parsers/ASTDropIndexQuery.cpp | 1 + src/Parsers/ASTDropQuery.cpp | 1 + src/Parsers/ASTInsertQuery.cpp | 1 + src/Parsers/ASTOptimizeQuery.cpp | 1 + src/Parsers/ASTQueryWithTableAndOutput.h | 2 ++ src/Parsers/ASTRenameQuery.h | 2 ++ src/Parsers/ASTSystemQuery.cpp | 1 + src/Parsers/ASTUndropQuery.cpp | 1 + src/Parsers/ASTWatchQuery.h | 1 + 15 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 2834a632631..dc1b35b3a21 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -343,10 +343,9 @@ quit # which is confusing. task_exit_code=$fuzzer_exit_code echo "failure" > status.txt - { rg -ao "Found error:.*" fuzzer.log \ - || rg -ao "Exception:.*" fuzzer.log \ - || echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \ - | tail -1 > description.txt + echo "Achtung!" > description.txt + echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt + { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt fi if test -f core.*; then diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index a93ad1d1746..f104e715452 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -634,6 +634,7 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } else if (alter_object == AlterObjectType::DATABASE && database) diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h index 5e9032e77fb..eca08b2b094 100644 --- a/src/Parsers/ASTCheckQuery.h +++ b/src/Parsers/ASTCheckQuery.h @@ -53,6 +53,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 84493ff22f1..f6def3ed85c 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -52,6 +52,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 1315ea5784c..de5eb40837f 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -337,6 +337,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) @@ -370,6 +371,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) diff --git a/src/Parsers/ASTDeleteQuery.cpp b/src/Parsers/ASTDeleteQuery.cpp index 1e8303dac62..67f3a85c9a5 100644 --- a/src/Parsers/ASTDeleteQuery.cpp +++ b/src/Parsers/ASTDeleteQuery.cpp @@ -40,6 +40,7 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTDropIndexQuery.cpp b/src/Parsers/ASTDropIndexQuery.cpp index 1a1481ea27f..1109f32f019 100644 --- a/src/Parsers/ASTDropIndexQuery.cpp +++ b/src/Parsers/ASTDropIndexQuery.cpp @@ -47,6 +47,7 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index f41f28f745d..ca47ceccb85 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -76,6 +76,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index eb4bcce6578..72a569fe047 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -74,6 +74,7 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTOptimizeQuery.cpp b/src/Parsers/ASTOptimizeQuery.cpp index d9406a9eca0..397a37586fc 100644 --- a/src/Parsers/ASTOptimizeQuery.cpp +++ b/src/Parsers/ASTOptimizeQuery.cpp @@ -15,6 +15,7 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTQueryWithTableAndOutput.h b/src/Parsers/ASTQueryWithTableAndOutput.h index 1b8621fb63b..29dbd30e54b 100644 --- a/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/src/Parsers/ASTQueryWithTableAndOutput.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -61,6 +62,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index 901f0121a28..73d12be094a 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -127,6 +127,7 @@ protected: settings.ostr << '.'; } + chassert(it->from.table); it->from.table->formatImpl(settings, state, frame); settings.ostr << (settings.hilite ? hilite_keyword : "") << (exchange ? " AND " : " TO ") << (settings.hilite ? hilite_none : ""); @@ -137,6 +138,7 @@ protected: settings.ostr << '.'; } + chassert(it->to.table); it->to.table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 63311a70e42..e2ebaee8438 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -114,6 +114,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); return settings.ostr; }; diff --git a/src/Parsers/ASTUndropQuery.cpp b/src/Parsers/ASTUndropQuery.cpp index ab96ca4711e..7212e264c0e 100644 --- a/src/Parsers/ASTUndropQuery.cpp +++ b/src/Parsers/ASTUndropQuery.cpp @@ -36,6 +36,7 @@ void ASTUndropQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTWatchQuery.h b/src/Parsers/ASTWatchQuery.h index 80b675f513f..a5b76c07605 100644 --- a/src/Parsers/ASTWatchQuery.h +++ b/src/Parsers/ASTWatchQuery.h @@ -52,6 +52,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (is_watch_events) From 47b308d2346ef2b87d478d8ee29ee439097df000 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:42:33 +0100 Subject: [PATCH 342/356] Simplify bridges --- .../library-bridge/LibraryBridgeHandlers.cpp | 3 -- programs/odbc-bridge/ColumnInfoHandler.cpp | 2 - .../odbc-bridge/IdentifierQuoteHandler.cpp | 3 -- programs/odbc-bridge/ODBCBlockInputStream.cpp | 50 ++++++------------- .../odbc-bridge/ODBCBlockOutputStream.cpp | 1 - 5 files changed, 16 insertions(+), 43 deletions(-) diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index ab146f458df..26d887cfc98 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -1,6 +1,5 @@ #include "LibraryBridgeHandlers.h" -#include "CatBoostLibraryHandler.h" #include "CatBoostLibraryHandlerFactory.h" #include "Common/ProfileEvents.h" #include "ExternalDictionaryLibraryHandler.h" @@ -11,10 +10,8 @@ #include #include #include -#include #include #include -#include #include #include #include diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 774883657b7..4cb15de3b2c 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -8,13 +8,11 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include "getIdentifierQuote.h" diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index a23efb112de..cf5acdc4534 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -7,13 +7,10 @@ #include #include #include -#include -#include #include #include #include #include -#include #include "getIdentifierQuote.h" #include "validateODBCConnectionString.h" #include "ODBCPooledConnectionFactory.h" diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index c46144c3dc8..2cccc66a033 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -3,12 +3,8 @@ #include #include #include -#include -#include -#include #include #include -#include #include @@ -54,21 +50,7 @@ Chunk ODBCSource::generate() const auto & sample = description.sample_block.getByPosition(idx); if (!result.is_null(idx)) - { - bool is_nullable = description.types[idx].second; - - if (is_nullable) - { - ColumnNullable & column_nullable = assert_cast(*columns[idx]); - const auto & data_type = assert_cast(*sample.type); - insertValue(column_nullable.getNestedColumn(), data_type.getNestedType(), description.types[idx].first, result, idx); - column_nullable.getNullMapData().emplace_back(0); - } - else - { - insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx); - } - } + insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx); else insertDefaultValue(*columns[idx], *sample.column); } @@ -87,49 +69,49 @@ void ODBCSource::insertValue( switch (type) { case ValueType::vtUInt8: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtUInt16: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtUInt32: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtUInt64: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtInt8: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtInt16: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtInt32: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtInt64: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtFloat32: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtFloat64: - assert_cast(column).insertValue(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtFixedString:[[fallthrough]]; case ValueType::vtEnum8: case ValueType::vtEnum16: case ValueType::vtString: - assert_cast(column).insert(row.get(idx)); + column.insert(row.get(idx)); break; case ValueType::vtUUID: { auto value = row.get(idx); - assert_cast(column).insert(parse(value.data(), value.size())); + column.insert(parse(value.data(), value.size())); break; } case ValueType::vtDate: - assert_cast(column).insertValue(UInt16{LocalDate{row.get(idx)}.getDayNum()}); + column.insert(UInt16{LocalDate{row.get(idx)}.getDayNum()}); break; case ValueType::vtDateTime: { @@ -139,7 +121,7 @@ void ODBCSource::insertValue( readDateTimeText(time, in, assert_cast(data_type.get())->getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(static_cast(time)); + column.insert(static_cast(time)); break; } case ValueType::vtDateTime64: @@ -149,7 +131,7 @@ void ODBCSource::insertValue( DateTime64 time = 0; const auto * datetime_type = assert_cast(data_type.get()); readDateTime64Text(time, datetime_type->getScale(), in, datetime_type->getTimeZone()); - assert_cast(column).insertValue(time); + column.insert(time); break; } case ValueType::vtDecimal32: [[fallthrough]]; diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index 87c09d1e757..37b70023169 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -1,6 +1,5 @@ #include "ODBCBlockOutputStream.h" -#include #include #include #include From 7f061ba89c5d598132c034cce980be714e9ef32c Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Sat, 9 Mar 2024 13:43:37 +0000 Subject: [PATCH 343/356] CI: add wf class in ci_config #do_not_test --- tests/ci/ci.py | 2 +- tests/ci/ci_config.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 6b754787d5e..1eec9a6771b 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1111,7 +1111,7 @@ def _configure_jobs( digests: Dict[str, str] = {} print("::group::Job Digests") - for job in CI_CONFIG.job_generator(): + for job in CI_CONFIG.job_generator(pr_info.head_ref): digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 44dea116cbe..df8bfb1c2a8 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -11,6 +11,14 @@ from ci_utils import WithIter from integration_test_images import IMAGES +class WorkFlows(metaclass=WithIter): + PULL_REQUEST = "PULL_REQUEST" + MASTER = "MASTER" + BACKPORT = "BACKPORT" + RELEASE = "RELEASE" + SYNC = "SYNC" + + class CIStages(metaclass=WithIter): NA = "UNKNOWN" BUILDS_1 = "Builds_1" @@ -694,10 +702,11 @@ class CIConfig: ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore - def job_generator(self) -> Iterable[str]: + def job_generator(self, branch: str) -> Iterable[str]: """ traverses all check names in CI pipeline """ + assert branch for config in ( self.other_jobs_configs, self.build_config, From 4af16043b86a6e72b3f13579a938d271fd0b77dd Mon Sep 17 00:00:00 2001 From: tomershafir Date: Sat, 9 Mar 2024 21:27:28 +0200 Subject: [PATCH 344/356] improve randomness by erase buf elem when full --- src/Client/QueryFuzzer.cpp | 49 +++++++++++++++++++------------------- src/Client/QueryFuzzer.h | 6 ++--- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index ea2e5e17afd..d23b5159854 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -232,7 +232,7 @@ ASTPtr QueryFuzzer::getRandomColumnLike() return nullptr; } - ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()]->clone(); + ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()].second->clone(); new_ast->setAlias(""); return new_ast; @@ -272,7 +272,7 @@ void QueryFuzzer::replaceWithTableLike(ASTPtr & ast) return; } - ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()]->clone(); + ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()].second->clone(); std::string old_alias = ast->tryGetAlias(); new_ast->setAlias(old_alias); @@ -1214,51 +1214,46 @@ void QueryFuzzer::fuzz(ASTPtr & ast) } } +#define AST_FUZZER_PART_TYPE_CAP 1000 + /* * This functions collects various parts of query that we can then substitute * to a query being fuzzed. - * - * TODO: we just stop remembering new parts after our corpus reaches certain size. - * This is boring, should implement a random replacement of existing parst with - * small probability. Do this after we add this fuzzer to CI and fix all the - * problems it can routinely find even in this boring version. */ void QueryFuzzer::collectFuzzInfoMain(ASTPtr ast) { collectFuzzInfoRecurse(ast); - - column_like.clear(); - for (const auto & [name, value] : column_like_map) - { - column_like.push_back(value); - } - - table_like.clear(); - for (const auto & [name, value] : table_like_map) - { - table_like.push_back(value); - } } void QueryFuzzer::addTableLike(ASTPtr ast) { - if (table_like_map.size() > 1000) + if (table_like_map.size() > AST_FUZZER_PART_TYPE_CAP) { - table_like_map.clear(); + const auto iter = std::next(table_like.begin(), fuzz_rand() % table_like.size()); + const auto ast_del = *iter; + table_like.erase(iter); + table_like_map.erase(ast_del.first); } const auto name = ast->formatForErrorMessage(); if (name.size() < 200) { - table_like_map.insert({name, ast}); + const auto res = table_like_map.insert({name, ast}); + if (res.second) + { + table_like.push_back({name, ast}); + } } } void QueryFuzzer::addColumnLike(ASTPtr ast) { - if (column_like_map.size() > 1000) + if (column_like_map.size() > AST_FUZZER_PART_TYPE_CAP) { - column_like_map.clear(); + const auto iter = std::next(column_like.begin(), fuzz_rand() % column_like.size()); + const auto ast_del = *iter; + column_like.erase(iter); + column_like_map.erase(ast_del.first); } const auto name = ast->formatForErrorMessage(); @@ -1273,7 +1268,11 @@ void QueryFuzzer::addColumnLike(ASTPtr ast) } if (name.size() < 200) { - column_like_map.insert({name, ast}); + const auto res = column_like_map.insert({name, ast}); + if (res.second) + { + column_like.push_back({name, ast}); + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index 3bc7b0842d3..6165e589cae 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -50,12 +50,12 @@ struct QueryFuzzer // we are currently fuzzing. We add some part from each new query we are asked // to fuzz, and keep this state between queries, so the fuzzing output becomes // more interesting over time, as the queries mix. - // The maps are used for collection, and the vectors are used for random access. + // The hash tables are used for collection, and the vectors are used for random access. std::unordered_map column_like_map; - std::vector column_like; + std::vector> column_like; std::unordered_map table_like_map; - std::vector table_like; + std::vector> table_like; // Some debug fields for detecting problematic ASTs with loops. // These are reset for each fuzzMain call. From 681bd28c877a365b8f18f6325c00e697ee83baef Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Sat, 9 Mar 2024 20:38:23 +0000 Subject: [PATCH 345/356] CI: make style check fast #do_not_test --- docker/test/style/Dockerfile | 1 - tests/ci/style_check.py | 41 ++++++++++++++----- utils/check-style/check-pylint | 15 +++++++ utils/check-style/check-style | 10 ----- utils/check-style/check_cpp_docs.sh | 34 +++++++++++++++ utils/check-style/check_py.sh | 17 ++++++++ .../process_style_check_result.py | 7 ++-- 7 files changed, 100 insertions(+), 25 deletions(-) create mode 100755 utils/check-style/check-pylint create mode 100755 utils/check-style/check_cpp_docs.sh create mode 100755 utils/check-style/check_py.sh rename {docker/test/style => utils/check-style}/process_style_check_result.py (96%) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 122f558bab2..b4ffcfb597c 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -60,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \ COPY run.sh / -COPY process_style_check_result.py / CMD ["/bin/bash", "/run.sh"] diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index d004f3ed215..d0565e136d3 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +from concurrent.futures import ProcessPoolExecutor import csv import logging import os @@ -119,7 +120,7 @@ def checkout_last_ref(pr_info: PRInfo) -> None: def main(): logging.basicConfig(level=logging.INFO) logging.getLogger("git_helper").setLevel(logging.DEBUG) - args = parse_args() + # args = parse_args() stopwatch = Stopwatch() @@ -127,28 +128,46 @@ def main(): temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() + # pr_info = PRInfo() IMAGE_NAME = "clickhouse/style-test" image = pull_image(get_docker_image(IMAGE_NAME)) - cmd = ( + cmd_1 = ( f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " - f"{image}" + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_cpp_docs.sh" ) + cmd_2 = ( + f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " + f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_py.sh" + ) + logging.info("Is going to run the command: %s", cmd_1) + logging.info("Is going to run the command: %s", cmd_2) - if args.push: - checkout_head(pr_info) + with ProcessPoolExecutor(max_workers=2) as executor: + # Submit commands for execution in parallel + future1 = executor.submit(subprocess.run, cmd_1, shell=True) + future2 = executor.submit(subprocess.run, cmd_2, shell=True) + # Wait for both commands to complete + _ = future1.result() + _ = future2.result() + + # if args.push: + # checkout_head(pr_info) - logging.info("Is going to run the command: %s", cmd) subprocess.check_call( - cmd, + f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " + f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " + f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv', shell=True, ) - if args.push: - commit_push_staged(pr_info) - checkout_last_ref(pr_info) + # if args.push: + # commit_push_staged(pr_info) + # checkout_last_ref(pr_info) state, description, test_results, additional_files = process_result(temp_path) diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint new file mode 100755 index 00000000000..7959a414023 --- /dev/null +++ b/utils/check-style/check-pylint @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + + +LC_ALL="en_US.UTF-8" +ROOT_PATH=$(git rev-parse --show-toplevel) + +function xargs-pylint { + # $1 is number maximum arguments per pylint process + sort | awk '$2=="text/x-script.python" {print $1}' | \ + xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n +} + +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo +find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 3a5d0c053ea..d7387c3f843 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -152,16 +152,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | grep -vP $EXCLUDE_DIRS | xargs xmllint --noout --nonet -function xargs-pylint { - # $1 is number maximum arguments per pylint process - sort | awk '$2=="text/x-script.python" {print $1}' | \ - xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n -} - -find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 -# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo -find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 - find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | xargs yamllint --config-file=$ROOT_PATH/.yamllint diff --git a/utils/check-style/check_cpp_docs.sh b/utils/check-style/check_cpp_docs.sh new file mode 100755 index 00000000000..7ad3cede758 --- /dev/null +++ b/utils/check-style/check_cpp_docs.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +# FIXME: 30 sec to wait +# echo "Check duplicates" | ts +# ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt + +echo "Check style" | ts +./check-style -n |& tee /test_output/style_output.txt +echo "Check typos" | ts +./check-typos |& tee /test_output/typos_output.txt +echo "Check docs spelling" | ts +./check-doc-aspell |& tee /test_output/docs_spelling_output.txt +echo "Check whitespaces" | ts +./check-whitespaces -n |& tee /test_output/whitespaces_output.txt +echo "Check workflows" | ts +./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt +echo "Check style. Done" | ts + +# FIXME: 6 min to wait +# echo "Check shell scripts with shellcheck" | ts +# ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt + + +# FIXME: move out +# /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +# echo "Check help for changelog generator works" | ts +# cd ../changelog || exit 1 +# ./changelog.py -h 2>/dev/null 1>&2 diff --git a/utils/check-style/check_py.sh b/utils/check-style/check_py.sh new file mode 100755 index 00000000000..48c02013734 --- /dev/null +++ b/utils/check-style/check_py.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +# FIXME: 1 min to wait + head checkout +# echo "Check python formatting with black" | ts +# ./check-black -n |& tee /test_output/black_output.txt + +echo "Check pylint" | ts +./check-pylint -n |& tee /test_output/pylint_output.txt +echo "Check pylint. Done" | ts + +echo "Check python type hinting with mypy" | ts +./check-mypy -n |& tee /test_output/mypy_output.txt +echo "Check python type hinting with mypy. Done" | ts diff --git a/docker/test/style/process_style_check_result.py b/utils/check-style/process_style_check_result.py similarity index 96% rename from docker/test/style/process_style_check_result.py rename to utils/check-style/process_style_check_result.py index bc06df1af31..7980c01dd37 100755 --- a/docker/test/style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -13,10 +13,11 @@ def process_result(result_folder): description = "" test_results = [] checks = ( - "duplicate includes", - "shellcheck", + #"duplicate includes", + #"shellcheck", "style", - "black", + "pylint", + #"black", "mypy", "typos", "whitespaces", From 453eb259842f3181d5f9329e0e82e4c1e66f18b0 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Sun, 10 Mar 2024 08:42:54 +0200 Subject: [PATCH 346/356] rm trailing whitespace --- src/Client/QueryFuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index d23b5159854..7f1dce4b29a 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -1232,7 +1232,7 @@ void QueryFuzzer::addTableLike(ASTPtr ast) const auto iter = std::next(table_like.begin(), fuzz_rand() % table_like.size()); const auto ast_del = *iter; table_like.erase(iter); - table_like_map.erase(ast_del.first); + table_like_map.erase(ast_del.first); } const auto name = ast->formatForErrorMessage(); From 9ddec346a03108198fea011c1113495050ac628b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 09:02:34 +0100 Subject: [PATCH 347/356] Miscellaneous --- programs/odbc-bridge/ODBCBlockInputStream.cpp | 8 +- src/Storages/MergeTree/MergeTask.cpp | 4 +- .../test_odbc_interaction/test_exiled.py | 111 ------------------ 3 files changed, 6 insertions(+), 117 deletions(-) delete mode 100644 tests/integration/test_odbc_interaction/test_exiled.py diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 2cccc66a033..4fd9b4cca45 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -98,7 +98,7 @@ void ODBCSource::insertValue( case ValueType::vtFloat64: column.insert(row.get(idx)); break; - case ValueType::vtFixedString:[[fallthrough]]; + case ValueType::vtFixedString: case ValueType::vtEnum8: case ValueType::vtEnum16: case ValueType::vtString: @@ -134,9 +134,9 @@ void ODBCSource::insertValue( column.insert(time); break; } - case ValueType::vtDecimal32: [[fallthrough]]; - case ValueType::vtDecimal64: [[fallthrough]]; - case ValueType::vtDecimal128: [[fallthrough]]; + case ValueType::vtDecimal32: + case ValueType::vtDecimal64: + case ValueType::vtDecimal128: case ValueType::vtDecimal256: { auto value = row.get(idx); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index aa38198334e..06b6927bc43 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -296,7 +296,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() switch (global_ctx->chosen_merge_algorithm) { - case MergeAlgorithm::Horizontal : + case MergeAlgorithm::Horizontal: { global_ctx->merging_columns = global_ctx->storage_columns; global_ctx->merging_column_names = global_ctx->all_column_names; @@ -304,7 +304,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->gathering_column_names.clear(); break; } - case MergeAlgorithm::Vertical : + case MergeAlgorithm::Vertical: { ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream(); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); diff --git a/tests/integration/test_odbc_interaction/test_exiled.py b/tests/integration/test_odbc_interaction/test_exiled.py deleted file mode 100644 index bdc819b8221..00000000000 --- a/tests/integration/test_odbc_interaction/test_exiled.py +++ /dev/null @@ -1,111 +0,0 @@ -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster, assert_eq_with_retry -from test_odbc_interaction.test import ( - create_mysql_db, - create_mysql_table, - get_mysql_conn, - skip_test_msan, -) - - -cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", - with_odbc_drivers=True, - main_configs=["configs/openssl.xml", "configs/odbc_logging.xml"], - stay_alive=True, - dictionaries=["configs/dictionaries/sqlite3_odbc_hashed_dictionary.xml"], -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] - logging.debug(f"sqlite data received: {sqlite_db}") - node1.exec_in_container( - [ - "sqlite3", - sqlite_db, - "CREATE TABLE t2(id INTEGER PRIMARY KEY ASC, X INTEGER, Y, Z);", - ], - privileged=True, - user="root", - ) - - node1.exec_in_container( - ["sqlite3", sqlite_db, "INSERT INTO t2 values(1, 1, 2, 3);"], - privileged=True, - user="root", - ) - - node1.query("SYSTEM RELOAD DICTIONARY sqlite3_odbc_hashed") - - yield cluster - except Exception as ex: - logging.exception(ex) - raise ex - finally: - cluster.shutdown() - - -# This test kills ClickHouse server and ODBC bridge and in worst scenario -# may cause group test crashes. Thus, this test is executed in a separate "module" -# with separate environment. -def test_bridge_dies_with_parent(started_cluster): - skip_test_msan(node1) - - if node1.is_built_with_address_sanitizer(): - # TODO: Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge in this test and - # that's linked somehow with that we have replaced getauxval() in glibc-compatibility. - # The leak sanitizer calls getauxval() for its own purposes, and our replaced version doesn't seem to be equivalent in that case. - pytest.skip( - "Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge" - ) - - assert_eq_with_retry( - node1, "select dictGetUInt8('sqlite3_odbc_hashed', 'Z', toUInt64(1))", "3" - ) - - clickhouse_pid = node1.get_process_pid("clickhouse server") - bridge_pid = node1.get_process_pid("odbc-bridge") - assert clickhouse_pid is not None - assert bridge_pid is not None - - try: - node1.exec_in_container( - ["kill", str(clickhouse_pid)], privileged=True, user="root" - ) - except: - pass - - for _ in range(30): - time.sleep(1) - clickhouse_pid = node1.get_process_pid("clickhouse server") - if clickhouse_pid is None: - break - - for _ in range(30): - time.sleep(1) # just for sure, that odbc-bridge caught signal - bridge_pid = node1.get_process_pid("odbc-bridge") - if bridge_pid is None: - break - - if bridge_pid: - out = node1.exec_in_container( - ["gdb", "-p", str(bridge_pid), "--ex", "thread apply all bt", "--ex", "q"], - privileged=True, - user="root", - ) - logging.debug(f"Bridge is running, gdb output:\n{out}") - - try: - assert clickhouse_pid is None - assert bridge_pid is None - finally: - node1.start_clickhouse(20) From 1a2d403f7b7dc16676d1b550dd346c06503c550c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 09:16:21 +0100 Subject: [PATCH 348/356] Miscellaneae --- programs/odbc-bridge/ODBCBlockInputStream.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 4fd9b4cca45..934639ae604 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -118,7 +118,8 @@ void ODBCSource::insertValue( auto value = row.get(idx); ReadBufferFromString in(value); time_t time = 0; - readDateTimeText(time, in, assert_cast(data_type.get())->getTimeZone()); + const DataTypeDateTime & datetime_type = assert_cast(*data_type); + readDateTimeText(time, in, datetime_type.getTimeZone()); if (time < 0) time = 0; column.insert(static_cast(time)); @@ -129,8 +130,8 @@ void ODBCSource::insertValue( auto value = row.get(idx); ReadBufferFromString in(value); DateTime64 time = 0; - const auto * datetime_type = assert_cast(data_type.get()); - readDateTime64Text(time, datetime_type->getScale(), in, datetime_type->getTimeZone()); + const DataTypeDateTime64 & datetime_type = assert_cast(*data_type); + readDateTime64Text(time, datetime_type.getScale(), in, datetime_type.getTimeZone()); column.insert(time); break; } From 157adefc70d8152f744b23152309cbf03b00c990 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 09:17:29 +0100 Subject: [PATCH 349/356] Fix error --- programs/odbc-bridge/ODBCBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 934639ae604..59a5deac960 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -50,7 +50,7 @@ Chunk ODBCSource::generate() const auto & sample = description.sample_block.getByPosition(idx); if (!result.is_null(idx)) - insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx); + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); else insertDefaultValue(*columns[idx], *sample.column); } From 3b8a8c9649b00d360c5d6169dc769c5371dd44a2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 09:28:30 +0100 Subject: [PATCH 350/356] Better gitignore --- docker/test/stateless/.gitignore | 1 + programs/server/.gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 docker/test/stateless/.gitignore diff --git a/docker/test/stateless/.gitignore b/docker/test/stateless/.gitignore new file mode 100644 index 00000000000..928fed26d6d --- /dev/null +++ b/docker/test/stateless/.gitignore @@ -0,0 +1 @@ +/minio_data diff --git a/programs/server/.gitignore b/programs/server/.gitignore index ddc480e4b29..34a774bde9d 100644 --- a/programs/server/.gitignore +++ b/programs/server/.gitignore @@ -2,6 +2,7 @@ /metadata_dropped /data /store +/disks /access /flags /dictionaries_lib From d8e3fa6dc107a4b43edb091a3688b1672a6c2f70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 09:35:57 +0100 Subject: [PATCH 351/356] Fix something strange --- src/Interpreters/fuzzers/execute_query_fuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index 6f84a60f2af..edff202d547 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -3,7 +3,7 @@ #include #include "Processors/Executors/PullingPipelineExecutor.h" -#include +#include #include #include #include From 5c41727725eb1066ff2d86fc5da3e272e03155c1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 3 Mar 2024 14:22:40 +0100 Subject: [PATCH 352/356] http connections pools --- .../Net/include/Poco/Net/HTTPChunkedStream.h | 4 + .../Net/include/Poco/Net/HTTPClientSession.h | 24 +- .../include/Poco/Net/HTTPFixedLengthStream.h | 4 + base/poco/Net/include/Poco/Net/HTTPSession.h | 24 +- base/poco/Net/include/Poco/Net/HTTPStream.h | 2 + base/poco/Net/src/HTTPChunkedStream.cpp | 4 +- base/poco/Net/src/HTTPClientSession.cpp | 23 +- base/poco/Net/src/HTTPFixedLengthStream.cpp | 6 + programs/server/Server.cpp | 18 + .../static-files-disk-uploader.cpp | 6 +- src/Access/Common/AccessType.h | 3 +- src/Access/HTTPAuthClient.h | 2 +- .../CatBoostLibraryBridgeHelper.cpp | 104 +- .../ExternalDictionaryLibraryBridgeHelper.cpp | 38 +- src/BridgeHelper/XDBCBridgeHelper.h | 26 +- src/Client/Connection.cpp | 16 + src/Common/CurrentMetrics.cpp | 12 + src/Common/DNSResolver.cpp | 3 +- src/Common/HTTPConnectionPool.cpp | 865 ++++++++++++++++ src/Common/HTTPConnectionPool.h | 91 ++ src/Common/HostResolvePool.cpp | 293 ++++++ src/Common/HostResolvePool.h | 218 ++++ src/Common/ProfileEvents.cpp | 29 + src/Common/ProxyConfiguration.h | 12 +- src/Common/ProxyListConfigurationResolver.cpp | 2 - .../RemoteProxyConfigurationResolver.cpp | 2 +- src/Common/tests/gtest_connection_pool.cpp | 558 +++++++++++ src/Common/tests/gtest_resolve_pool.cpp | 278 ++++++ src/Core/ServerSettings.h | 11 +- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 3 +- src/Dictionaries/HTTPDictionarySource.cpp | 91 +- src/Dictionaries/XDBCDictionarySource.cpp | 15 +- src/Dictionaries/XDBCDictionarySource.h | 2 +- src/Disks/IO/ReadBufferFromWebServer.cpp | 81 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 4 - .../ObjectStorages/Web/WebObjectStorage.cpp | 35 +- src/Functions/sqid.cpp | 2 +- src/IO/ConnectionTimeouts.cpp | 15 + src/IO/ConnectionTimeouts.h | 4 + src/IO/HTTPCommon.cpp | 379 +------ src/IO/HTTPCommon.h | 48 +- src/IO/LimitReadBuffer.cpp | 25 +- src/IO/LimitReadBuffer.h | 18 +- src/IO/MMapReadBufferFromFileDescriptor.cpp | 2 +- src/IO/MMapReadBufferFromFileDescriptor.h | 2 +- src/IO/ReadBuffer.h | 12 +- src/IO/ReadBufferFromFileDescriptor.cpp | 4 +- src/IO/ReadBufferFromFileDescriptor.h | 4 +- src/IO/ReadBufferFromIStream.cpp | 40 +- src/IO/ReadBufferFromS3.cpp | 100 +- src/IO/ReadBufferFromS3.h | 6 +- src/IO/ReadSettings.h | 2 +- src/IO/ReadWriteBufferFromHTTP.cpp | 945 +++++++----------- src/IO/ReadWriteBufferFromHTTP.h | 426 ++++---- src/IO/S3/PocoHTTPClient.cpp | 63 +- src/IO/S3/PocoHTTPClient.h | 17 +- src/IO/SeekableReadBuffer.h | 2 +- src/IO/{S3 => }/SessionAwareIOStream.h | 2 +- src/IO/WriteBufferFromHTTP.cpp | 6 +- src/IO/WriteBufferFromHTTP.h | 3 +- src/IO/copyData.cpp | 2 +- src/Interpreters/Context.cpp | 13 - src/Interpreters/Context.h | 4 - src/Interpreters/DDLTask.cpp | 5 + src/Interpreters/InterpreterSystemQuery.cpp | 9 + src/Parsers/ASTSystemQuery.cpp | 1 + src/Parsers/ASTSystemQuery.h | 1 + .../Formats/Impl/AvroRowInputFormat.cpp | 4 +- src/Storages/MergeTree/DataPartsExchange.cpp | 25 +- src/Storages/MergeTree/DataPartsExchange.h | 10 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageURL.cpp | 49 +- src/TableFunctions/ITableFunctionXDBC.cpp | 17 +- .../test_cancel_backup.py | 2 +- .../configs/setting.xml | 1 + .../configs/storage_conf.xml | 1 + .../test_disk_over_web_server/test.py | 2 +- tests/integration/test_dns_cache/test.py | 12 +- tests/integration/test_http_failover/test.py | 6 +- .../test_redirect_url_storage/test.py | 2 +- .../test_s3_table_functions/test.py | 1 + .../s3_mocks/unstable_server.py | 2 +- .../0_stateless/00646_url_engine.python | 13 +- .../01271_show_privileges.reference | 1 + .../0_stateless/01293_show_settings.reference | 1 + .../0_stateless/02205_HTTP_user_agent.python | 13 +- .../0_stateless/02233_HTTP_ranged.python | 25 +- ...rofile_events_from_query_log_and_client.sh | 2 +- ...89_reading_from_s3_with_connection_pool.sh | 6 +- .../02833_url_without_path_encoding.sh | 4 +- 92 files changed, 3507 insertions(+), 1771 deletions(-) create mode 100644 src/Common/HTTPConnectionPool.cpp create mode 100644 src/Common/HTTPConnectionPool.h create mode 100644 src/Common/HostResolvePool.cpp create mode 100644 src/Common/HostResolvePool.h create mode 100644 src/Common/tests/gtest_connection_pool.cpp create mode 100644 src/Common/tests/gtest_resolve_pool.cpp rename src/IO/{S3 => }/SessionAwareIOStream.h (97%) diff --git a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h index 5f4729c9278..a6576aa561d 100644 --- a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h @@ -45,6 +45,8 @@ namespace Net ~HTTPChunkedStreamBuf(); void close(); + bool isComplete() const { return _chunk == std::char_traits::eof(); } + protected: int readFromDevice(char * buffer, std::streamsize length); int writeToDevice(const char * buffer, std::streamsize length); @@ -68,6 +70,8 @@ namespace Net ~HTTPChunkedIOS(); HTTPChunkedStreamBuf * rdbuf(); + bool isComplete() const { return _buf.isComplete(); } + protected: HTTPChunkedStreamBuf _buf; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index 7c0caa1c18b..1cef988566c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -210,7 +210,7 @@ namespace Net void setKeepAliveTimeout(const Poco::Timespan & timeout); /// Sets the connection timeout for HTTP connections. - const Poco::Timespan & getKeepAliveTimeout() const; + Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. virtual std::ostream & sendRequest(HTTPRequest & request); @@ -275,7 +275,7 @@ namespace Net /// This method should only be called if the request contains /// a "Expect: 100-continue" header. - void flushRequest(); + virtual void flushRequest(); /// Flushes the request stream. /// /// Normally this method does not need to be called. @@ -283,7 +283,7 @@ namespace Net /// fully sent if receiveResponse() is not called, e.g., /// because the underlying socket will be detached. - void reset(); + virtual void reset(); /// Resets the session and closes the socket. /// /// The next request will initiate a new connection, @@ -303,6 +303,9 @@ namespace Net /// Returns true if the proxy should be bypassed /// for the current host. + const Poco::Timestamp & getLastRequest() const; + /// Returns time when connection has been used last time + protected: enum { @@ -338,6 +341,10 @@ namespace Net /// Calls proxyConnect() and attaches the resulting StreamSocket /// to the HTTPClientSession. + void setLastRequest(Poco::Timestamp time); + + void assign(HTTPClientSession & session); + HTTPSessionFactory _proxySessionFactory; /// Factory to create HTTPClientSession to proxy. private: @@ -433,11 +440,20 @@ namespace Net } - inline const Poco::Timespan & HTTPClientSession::getKeepAliveTimeout() const + inline Poco::Timespan HTTPClientSession::getKeepAliveTimeout() const { return _keepAliveTimeout; } + inline const Poco::Timestamp & HTTPClientSession::getLastRequest() const + { + return _lastRequest; + } + + inline void HTTPClientSession::setLastRequest(Poco::Timestamp time) + { + _lastRequest = time; + } } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h index 2f4df102605..17fa47cfa9b 100644 --- a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h @@ -48,6 +48,8 @@ namespace Net HTTPFixedLengthStreamBuf(HTTPSession & session, ContentLength length, openmode mode); ~HTTPFixedLengthStreamBuf(); + bool isComplete() const; + protected: int readFromDevice(char * buffer, std::streamsize length); int writeToDevice(const char * buffer, std::streamsize length); @@ -67,6 +69,8 @@ namespace Net ~HTTPFixedLengthIOS(); HTTPFixedLengthStreamBuf * rdbuf(); + bool isComplete() const { return _buf.isComplete(); } + protected: HTTPFixedLengthStreamBuf _buf; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPSession.h b/base/poco/Net/include/Poco/Net/HTTPSession.h index 934b34be5d5..cac14f479db 100644 --- a/base/poco/Net/include/Poco/Net/HTTPSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPSession.h @@ -64,6 +64,15 @@ namespace Net Poco::Timespan getTimeout() const; /// Returns the timeout for the HTTP session. + Poco::Timespan getConnectionTimeout() const; + /// Returns connection timeout for the HTTP session. + + Poco::Timespan getSendTimeout() const; + /// Returns send timeout for the HTTP session. + + Poco::Timespan getReceiveTimeout() const; + /// Returns receive timeout for the HTTP session. + bool connected() const; /// Returns true if the underlying socket is connected. @@ -217,12 +226,25 @@ namespace Net return _keepAlive; } - inline Poco::Timespan HTTPSession::getTimeout() const { return _receiveTimeout; } + inline Poco::Timespan HTTPSession::getConnectionTimeout() const + { + return _connectionTimeout; + } + + inline Poco::Timespan HTTPSession::getSendTimeout() const + { + return _sendTimeout; + } + + inline Poco::Timespan HTTPSession::getReceiveTimeout() const + { + return _receiveTimeout; + } inline StreamSocket & HTTPSession::socket() { diff --git a/base/poco/Net/include/Poco/Net/HTTPStream.h b/base/poco/Net/include/Poco/Net/HTTPStream.h index 48502347b2c..a00a861880f 100644 --- a/base/poco/Net/include/Poco/Net/HTTPStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPStream.h @@ -63,6 +63,8 @@ namespace Net ~HTTPIOS(); HTTPStreamBuf * rdbuf(); + bool isComplete() const { return false; } + protected: HTTPStreamBuf _buf; }; diff --git a/base/poco/Net/src/HTTPChunkedStream.cpp b/base/poco/Net/src/HTTPChunkedStream.cpp index 376e3f55492..16ed1e71c31 100644 --- a/base/poco/Net/src/HTTPChunkedStream.cpp +++ b/base/poco/Net/src/HTTPChunkedStream.cpp @@ -49,10 +49,12 @@ HTTPChunkedStreamBuf::~HTTPChunkedStreamBuf() void HTTPChunkedStreamBuf::close() { - if (_mode & std::ios::out) + if (_mode & std::ios::out && _chunk != std::char_traits::eof()) { sync(); _session.write("0\r\n\r\n", 5); + + _chunk = std::char_traits::eof(); } } diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 2282cca682b..33a3dcc4901 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -227,7 +227,7 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { _pRequestStream = 0; - _pResponseStream = 0; + _pResponseStream = 0; clearException(); _responseReceived = false; @@ -501,5 +501,26 @@ bool HTTPClientSession::bypassProxy() const else return false; } +void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) +{ + poco_assert (this != &session); + + if (session.buffered()) + throw Poco::LogicException("assign a session with not empty buffered data"); + + if (buffered()) + throw Poco::LogicException("assign to a session with not empty buffered data"); + + attachSocket(session.detachSocket()); + setLastRequest(session.getLastRequest()); + setResolvedHost(session.getResolvedHost()); + setKeepAlive(session.getKeepAlive()); + + setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); + setKeepAliveTimeout(session.getKeepAliveTimeout()); + setProxyConfig(session.getProxyConfig()); + + session.reset(); +} } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPFixedLengthStream.cpp b/base/poco/Net/src/HTTPFixedLengthStream.cpp index fd77ff71cd9..837e5723c57 100644 --- a/base/poco/Net/src/HTTPFixedLengthStream.cpp +++ b/base/poco/Net/src/HTTPFixedLengthStream.cpp @@ -43,6 +43,12 @@ HTTPFixedLengthStreamBuf::~HTTPFixedLengthStreamBuf() } +bool HTTPFixedLengthStreamBuf::isComplete() const +{ + return _count == _length; +} + + int HTTPFixedLengthStreamBuf::readFromDevice(char* buffer, std::streamsize length) { int n = 0; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f2c5dcc0f6d..7636f221ab5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1547,6 +1548,23 @@ try FileCacheFactory::instance().updateSettingsFromConfig(*config); + HTTPConnectionPools::instance().setLimits( + HTTPConnectionPools::Limits{ + new_server_settings.disk_connections_soft_limit, + new_server_settings.disk_connections_warn_limit, + new_server_settings.disk_connections_store_limit, + }, + HTTPConnectionPools::Limits{ + new_server_settings.storage_connections_soft_limit, + new_server_settings.storage_connections_warn_limit, + new_server_settings.storage_connections_store_limit, + }, + HTTPConnectionPools::Limits{ + new_server_settings.http_connections_soft_limit, + new_server_settings.http_connections_warn_limit, + new_server_settings.http_connections_store_limit, + }); + ProfileEvents::increment(ProfileEvents::MainConfigLoads); /// Must be the last. diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index d54a2d2da6e..dfe68c819fc 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -65,7 +65,7 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes /// test mode for integration tests. if (test_mode) - dst_buf = std::make_shared(Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT); + dst_buf = std::make_shared(HTTPConnectionGroupType::HTTP, Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT); else dst_buf = std::make_shared(dst_file_path); @@ -88,7 +88,7 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_ { dst_path /= "store"; auto files_root = dst_path / prefix; - root_meta = std::make_shared(Poco::URI(files_root / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); + root_meta = std::make_shared(HTTPConnectionGroupType::HTTP, Poco::URI(files_root / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); } else { @@ -112,7 +112,7 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_ if (test_mode) { auto files_root = dst_path / prefix; - directory_meta = std::make_shared(Poco::URI(dst_path / directory_prefix / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); + directory_meta = std::make_shared(HTTPConnectionGroupType::HTTP, Poco::URI(dst_path / directory_prefix / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); } else { diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 87f96ca48be..6095f8ce6d3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -154,7 +154,8 @@ enum class AccessType M(SET_DEFINER, "", USER_NAME, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ - M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_DROP_CONNECTIONS_CACHE, "SYSTEM DROP CONNECTIONS CACHE, DROP CONNECTIONS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Access/HTTPAuthClient.h b/src/Access/HTTPAuthClient.h index caefe869005..a8b56cf05a7 100644 --- a/src/Access/HTTPAuthClient.h +++ b/src/Access/HTTPAuthClient.h @@ -36,7 +36,7 @@ public: Result authenticateRequest(Poco::Net::HTTPRequest & request) const { - auto session = makeHTTPSession(uri, timeouts); + auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, uri, timeouts); Poco::Net::HTTPResponse response; auto milliseconds_to_wait = retry_initial_backoff_ms; diff --git a/src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp b/src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp index 4db1eb720ac..bb9d890a691 100644 --- a/src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp +++ b/src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp @@ -58,8 +58,12 @@ bool CatBoostLibraryBridgeHelper::bridgeHandShake() String result; try { - ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials); - readString(result, buf); + auto buf = BuilderRWBufferFromHTTP(getPingURI()) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withTimeouts(http_timeouts) + .create(credentials); + + readString(result, *buf); } catch (...) { @@ -79,29 +83,29 @@ ExternalModelInfos CatBoostLibraryBridgeHelper::listModels() { startBridgeSync(); - ReadWriteBufferFromHTTP buf( - createRequestURI(CATBOOST_LIST_METHOD), - Poco::Net::HTTPRequest::HTTP_POST, - [](std::ostream &) {}, - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_LIST_METHOD)) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .create(credentials); ExternalModelInfos result; UInt64 num_rows; - readIntBinary(num_rows, buf); + readIntBinary(num_rows, *buf); for (UInt64 i = 0; i < num_rows; ++i) { ExternalModelInfo info; - readStringBinary(info.model_path, buf); - readStringBinary(info.model_type, buf); + readStringBinary(info.model_path, *buf); + readStringBinary(info.model_type, *buf); UInt64 t; - readIntBinary(t, buf); + readIntBinary(t, *buf); info.loading_start_time = std::chrono::system_clock::from_time_t(t); - readIntBinary(t, buf); + readIntBinary(t, *buf); info.loading_duration = std::chrono::milliseconds(t); result.push_back(info); @@ -116,17 +120,19 @@ void CatBoostLibraryBridgeHelper::removeModel() assert(model_path); - ReadWriteBufferFromHTTP buf( - createRequestURI(CATBOOST_REMOVEMODEL_METHOD), - Poco::Net::HTTPRequest::HTTP_POST, - [this](std::ostream & os) - { - os << "model_path=" << escapeForFileName(*model_path); - }, - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_REMOVEMODEL_METHOD)) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .withOutCallback( + [this](std::ostream & os) + { + os << "model_path=" << escapeForFileName(*model_path); + }) + .create(credentials); String result; - readStringBinary(result, buf); + readStringBinary(result, *buf); assert(result == "1"); } @@ -134,14 +140,14 @@ void CatBoostLibraryBridgeHelper::removeAllModels() { startBridgeSync(); - ReadWriteBufferFromHTTP buf( - createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD), - Poco::Net::HTTPRequest::HTTP_POST, - [](std::ostream &){}, - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD)) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .create(credentials); String result; - readStringBinary(result, buf); + readStringBinary(result, *buf); assert(result == "1"); } @@ -151,18 +157,20 @@ size_t CatBoostLibraryBridgeHelper::getTreeCount() assert(model_path && library_path); - ReadWriteBufferFromHTTP buf( - createRequestURI(CATBOOST_GETTREECOUNT_METHOD), - Poco::Net::HTTPRequest::HTTP_POST, - [this](std::ostream & os) - { - os << "library_path=" << escapeForFileName(*library_path) << "&"; - os << "model_path=" << escapeForFileName(*model_path); - }, - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_GETTREECOUNT_METHOD)) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .withOutCallback( + [this](std::ostream & os) + { + os << "library_path=" << escapeForFileName(*library_path) << "&"; + os << "model_path=" << escapeForFileName(*model_path); + }) + .create(credentials); size_t result; - readIntBinary(result, buf); + readIntBinary(result, *buf); return result; } @@ -177,17 +185,19 @@ ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & c assert(model_path); - ReadWriteBufferFromHTTP buf( - createRequestURI(CATBOOST_LIB_EVALUATE_METHOD), - Poco::Net::HTTPRequest::HTTP_POST, - [this, serialized = string_write_buf.str()](std::ostream & os) - { - os << "model_path=" << escapeForFileName(*model_path) << "&"; - os << "data=" << escapeForFileName(serialized); - }, - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_LIB_EVALUATE_METHOD)) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .withOutCallback( + [this, serialized = string_write_buf.str()](std::ostream & os) + { + os << "model_path=" << escapeForFileName(*model_path) << "&"; + os << "data=" << escapeForFileName(serialized); + }) + .create(credentials); - NativeReader deserializer(buf, /*server_revision*/ 0); + NativeReader deserializer(*buf, /*server_revision*/ 0); Block block_read = deserializer.read(); return block_read.getColumns()[0]; diff --git a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp index fcb8ebd1f22..a37d2bd396f 100644 --- a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp +++ b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp @@ -71,8 +71,12 @@ bool ExternalDictionaryLibraryBridgeHelper::bridgeHandShake() String result; try { - ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials); - readString(result, buf); + auto buf = BuilderRWBufferFromHTTP(getPingURI()) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withTimeouts(http_timeouts) + .create(credentials); + + readString(result, *buf); } catch (...) { @@ -247,30 +251,28 @@ QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadKeys(const Block & requ bool ExternalDictionaryLibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const { - ReadWriteBufferFromHTTP buf( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - std::move(out_stream_callback), - http_timeouts, credentials); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(http_timeouts) + .withOutCallback(std::move(out_stream_callback)) + .create(credentials); bool res; - readBoolText(res, buf); + readBoolText(res, *buf); return res; } QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) { - auto read_buf_ptr = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - std::move(out_stream_callback), - http_timeouts, - credentials, - 0, - DBMS_DEFAULT_BUFFER_SIZE, - getContext()->getReadSettings(), - HTTPHeaderEntries{}); + auto read_buf_ptr = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withSettings(getContext()->getReadSettings()) + .withTimeouts(http_timeouts) + .withOutCallback(std::move(out_stream_callback)) + .create(credentials); auto source = FormatFactory::instance().getInput(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, *read_buf_ptr, sample_block, getContext(), DEFAULT_BLOCK_SIZE); source->addBuffer(std::move(read_buf_ptr)); diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index d208b8ddab0..838be42357a 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -97,8 +97,12 @@ protected: { try { - ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, getHTTPTimeouts(), credentials); - return checkString(PING_OK_ANSWER, buf); + auto buf = BuilderRWBufferFromHTTP(getPingURI()) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withTimeouts(getHTTPTimeouts()) + .create(credentials); + + return checkString(PING_OK_ANSWER, *buf); } catch (...) { @@ -198,10 +202,14 @@ protected: uri.addQueryParameter("connection_string", getConnectionString()); uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling)); - ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, getHTTPTimeouts(), credentials); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(getHTTPTimeouts()) + .create(credentials); - bool res; - readBoolText(res, buf); + bool res = false; + readBoolText(res, *buf); is_schema_allowed = res; } @@ -220,10 +228,14 @@ protected: uri.addQueryParameter("connection_string", getConnectionString()); uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling)); - ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, getHTTPTimeouts(), credentials); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(getHTTPTimeouts()) + .create(credentials); std::string character; - readStringBinary(character, buf); + readStringBinary(character, *buf); if (character.length() > 1) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Failed to parse quoting style from '{}' for service {}", character, BridgeHelperMixin::serviceAlias()); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 4848f4c7ee5..a11a1243957 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -153,6 +153,12 @@ void Connection::connect(const ConnectionTimeouts & timeouts) current_resolved_address = *it; break; } + catch (DB::NetException &) + { + if (++it == addresses.end()) + throw; + continue; + } catch (Poco::Net::NetException &) { if (++it == addresses.end()) @@ -199,6 +205,16 @@ void Connection::connect(const ConnectionTimeouts & timeouts) LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.", server_name, server_version_major, server_version_minor, server_version_patch); } + catch (DB::NetException & e) + { + disconnect(); + + /// Remove this possible stale entry from cache + DNSResolver::instance().removeHostFromCache(host); + + /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. + throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription()); + } catch (Poco::Net::NetException & e) { disconnect(); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 82da4c4bbad..f43481f665b 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -275,6 +275,18 @@ M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \ M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \ M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") + \ + M(StorageConnectionsStored, "Total count of sessions stored in the session pool for storages") \ + M(StorageConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for storages") \ + \ + M(DiskConnectionsStored, "Total count of sessions stored in the session pool for disks") \ + M(DiskConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for disks") \ + \ + M(HTTPConnectionsStored, "Total count of sessions stored in the session pool for http hosts") \ + M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \ + \ + M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \ + #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 5b5f5369d5e..e36e1483da8 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -1,6 +1,7 @@ #include "DNSResolver.h" #include #include +#include #include #include #include @@ -108,7 +109,7 @@ DNSResolver::IPAddresses hostByName(const std::string & host) if (addresses.empty()) { ProfileEvents::increment(ProfileEvents::DNSError); - throw Exception(ErrorCodes::DNS_ERROR, "Not found address of host: {}", host); + throw DB::NetException(ErrorCodes::DNS_ERROR, "Not found address of host: {}", host); } return addresses; diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp new file mode 100644 index 00000000000..18ffef34091 --- /dev/null +++ b/src/Common/HTTPConnectionPool.cpp @@ -0,0 +1,865 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "config.h" + +#if USE_SSL +#include +#endif + + +namespace ProfileEvents +{ + extern const Event StorageConnectionsCreated; + extern const Event StorageConnectionsReused; + extern const Event StorageConnectionsReset; + extern const Event StorageConnectionsPreserved; + extern const Event StorageConnectionsExpired; + extern const Event StorageConnectionsErrors; + extern const Event StorageConnectionsElapsedMicroseconds; + + extern const Event DiskConnectionsCreated; + extern const Event DiskConnectionsReused; + extern const Event DiskConnectionsReset; + extern const Event DiskConnectionsPreserved; + extern const Event DiskConnectionsExpired; + extern const Event DiskConnectionsErrors; + extern const Event DiskConnectionsElapsedMicroseconds; + + extern const Event HTTPConnectionsCreated; + extern const Event HTTPConnectionsReused; + extern const Event HTTPConnectionsReset; + extern const Event HTTPConnectionsPreserved; + extern const Event HTTPConnectionsExpired; + extern const Event HTTPConnectionsErrors; + extern const Event HTTPConnectionsElapsedMicroseconds; +} + + +namespace CurrentMetrics +{ + extern const Metric StorageConnectionsStored; + extern const Metric StorageConnectionsTotal; + + extern const Metric DiskConnectionsStored; + extern const Metric DiskConnectionsTotal; + + extern const Metric HTTPConnectionsStored; + extern const Metric HTTPConnectionsTotal; +} + +namespace +{ + Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration) + { + Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; + + poco_proxy_config.host = proxy_configuration.host; + poco_proxy_config.port = proxy_configuration.port; + poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol); + poco_proxy_config.tunnel = proxy_configuration.tunneling; + poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol); + + return poco_proxy_config; + } + + + size_t roundUp(size_t x, size_t rounding) + { + chassert(rounding > 0); + return (x + (rounding - 1)) / rounding * rounding; + } + + + Poco::Timespan divide(const Poco::Timespan span, int divisor) + { + return Poco::Timespan(Poco::Timestamp::TimeDiff(span.totalMicroseconds() / divisor)); + } +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; + extern const int UNSUPPORTED_URI_SCHEME; +} + + +IHTTPConnectionPoolForEndpoint::Metrics getMetricsForStorageConnectionPool() +{ + return IHTTPConnectionPoolForEndpoint::Metrics{ + .created = ProfileEvents::StorageConnectionsCreated, + .reused = ProfileEvents::StorageConnectionsReused, + .reset = ProfileEvents::StorageConnectionsReset, + .preserved = ProfileEvents::StorageConnectionsPreserved, + .expired = ProfileEvents::StorageConnectionsExpired, + .errors = ProfileEvents::StorageConnectionsErrors, + .elapsed_microseconds = ProfileEvents::StorageConnectionsElapsedMicroseconds, + .stored_count = CurrentMetrics::StorageConnectionsStored, + .active_count = CurrentMetrics::StorageConnectionsTotal, + }; +} + + +IHTTPConnectionPoolForEndpoint::Metrics getMetricsForDiskConnectionPool() +{ + return IHTTPConnectionPoolForEndpoint::Metrics{ + .created = ProfileEvents::DiskConnectionsCreated, + .reused = ProfileEvents::DiskConnectionsReused, + .reset = ProfileEvents::DiskConnectionsReset, + .preserved = ProfileEvents::DiskConnectionsPreserved, + .expired = ProfileEvents::DiskConnectionsExpired, + .errors = ProfileEvents::DiskConnectionsErrors, + .elapsed_microseconds = ProfileEvents::DiskConnectionsElapsedMicroseconds, + .stored_count = CurrentMetrics::DiskConnectionsStored, + .active_count = CurrentMetrics::DiskConnectionsTotal, + }; +} + + +IHTTPConnectionPoolForEndpoint::Metrics getMetricsForHTTPConnectionPool() +{ + return IHTTPConnectionPoolForEndpoint::Metrics{ + .created = ProfileEvents::HTTPConnectionsCreated, + .reused = ProfileEvents::HTTPConnectionsReused, + .reset = ProfileEvents::HTTPConnectionsReset, + .preserved = ProfileEvents::HTTPConnectionsPreserved, + .expired = ProfileEvents::HTTPConnectionsExpired, + .errors = ProfileEvents::HTTPConnectionsErrors, + .elapsed_microseconds = ProfileEvents::HTTPConnectionsElapsedMicroseconds, + .stored_count = CurrentMetrics::HTTPConnectionsStored, + .active_count = CurrentMetrics::HTTPConnectionsTotal, + }; +} + + +IHTTPConnectionPoolForEndpoint::Metrics getConnectionPoolMetrics(HTTPConnectionGroupType type) +{ + switch (type) + { + case HTTPConnectionGroupType::STORAGE: + return getMetricsForStorageConnectionPool(); + case HTTPConnectionGroupType::DISK: + return getMetricsForDiskConnectionPool(); + case HTTPConnectionGroupType::HTTP: + return getMetricsForHTTPConnectionPool(); + } +} + + +class ConnectionGroup +{ +public: + using Ptr = std::shared_ptr; + + explicit ConnectionGroup(HTTPConnectionGroupType type_) : type(type_), metrics(getConnectionPoolMetrics(type_)) { } + + void setLimits(HTTPConnectionPools::Limits limits_) + { + std::lock_guard lock(mutex); + limits = std::move(limits_); + mute_warning_until = 0; + } + + bool isSoftLimitReached() const + { + std::lock_guard lock(mutex); + return total_connections_in_group >= limits.soft_limit; + } + + bool isStoreLimitReached() const + { + std::lock_guard lock(mutex); + return total_connections_in_group >= limits.store_limit; + } + + void atConnectionCreate() + { + std::lock_guard lock(mutex); + + ++total_connections_in_group; + + if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) + { + LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); + mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + } + } + + void atConnectionDestroy() + { + std::lock_guard lock(mutex); + + --total_connections_in_group; + + const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 10 : 1; + if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) + { + LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); + mute_warning_until = 0; + } + } + + HTTPConnectionGroupType getType() const { return type; } + + const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; } + +private: + const HTTPConnectionGroupType type; + const IHTTPConnectionPoolForEndpoint::Metrics metrics; + + LoggerPtr log = getLogger("ConnectionGroup"); + + mutable std::mutex mutex; + HTTPConnectionPools::Limits limits TSA_GUARDED_BY(mutex) = HTTPConnectionPools::Limits(); + size_t total_connections_in_group TSA_GUARDED_BY(mutex) = 0; + size_t mute_warning_until TSA_GUARDED_BY(mutex) = 0; +}; + + +class IExtendedPool : public IHTTPConnectionPoolForEndpoint +{ +public: + using Ptr = std::shared_ptr; + + virtual HTTPConnectionGroupType getGroupType() const = 0; + virtual size_t wipeExpired() = 0; +}; + + +// EndpointConnectionPool manage connections to the endpoint +// Features: +// - it uses HostResolver for address selecting. See Common/HostResolver.h for more info. +// - it minimizes number of `Session::connect()`/`Session::reconnect()` calls +// - stores only connected and ready to use sessions +// - connection could be reused even when limits are reached +// - soft limit, warn limit, store limit +// - `Session::reconnect()` uses the pool as well +// - comprehensive sensors +// - session is reused according its inner state, automatically + + +template +class EndpointConnectionPool : public std::enable_shared_from_this>, public IExtendedPool +{ +private: + friend class HTTPConnectionPools; + + using WeakPtr = std::weak_ptr>; + + class PooledConnection : public Session + { + public: + using Ptr = std::shared_ptr; + + void reconnect() override + { + ProfileEvents::increment(metrics.reset); + Session::close(); + + if (auto lock = pool.lock()) + { + auto timeouts = getTimeouts(*this); + auto new_connection = lock->getConnection(timeouts); + Session::assign(*new_connection); + } + else + { + auto timer = CurrentThread::getProfileEvents().timer(metrics.elapsed_microseconds); + Session::reconnect(); + ProfileEvents::increment(metrics.created); + } + } + + String getTarget() const + { + if (!Session::getProxyConfig().host.empty()) + return fmt::format("{} over proxy {}", Session::getHost(), Session::getProxyConfig().host); + return Session::getHost(); + } + + void flushRequest() override + { + if (bool(request_stream)) + { + request_stream->flush(); + + if (auto * fixed_steam = dynamic_cast(request_stream)) + { + request_stream_completed = fixed_steam->isComplete(); + } + else if (auto * chunked_steam = dynamic_cast(request_stream)) + { + chunked_steam->rdbuf()->close(); + request_stream_completed = chunked_steam->isComplete(); + } + else if (auto * http_stream = dynamic_cast(request_stream)) + { + request_stream_completed = http_stream->isComplete(); + } + else + { + request_stream_completed = false; + } + } + request_stream = nullptr; + + Session::flushRequest(); + } + + std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override + { + std::ostream & result = Session::sendRequest(request); + result.exceptions(std::ios::badbit); + + request_stream = &result; + request_stream_completed = false; + + response_stream = nullptr; + response_stream_completed = true; + + return result; + } + + std::istream & receiveResponse(Poco::Net::HTTPResponse & response) override + { + std::istream & result = Session::receiveResponse(response); + result.exceptions(std::ios::badbit); + + response_stream = &result; + response_stream_completed = false; + + return result; + } + + void reset() override + { + request_stream = nullptr; + request_stream_completed = false; + + response_stream = nullptr; + response_stream_completed = false; + + Session::reset(); + } + + ~PooledConnection() override + { + if (bool(response_stream)) + { + if (auto * fixed_steam = dynamic_cast(response_stream)) + { + response_stream_completed = fixed_steam->isComplete(); + } + else if (auto * chunked_steam = dynamic_cast(response_stream)) + { + response_stream_completed = chunked_steam->isComplete(); + } + else if (auto * http_stream = dynamic_cast(response_stream)) + { + response_stream_completed = http_stream->isComplete(); + } + else + { + response_stream_completed = false; + } + } + response_stream = nullptr; + + if (auto lock = pool.lock()) + lock->atConnectionDestroy(*this); + else + ProfileEvents::increment(metrics.reset); + + CurrentMetrics::sub(metrics.active_count); + } + + private: + friend class EndpointConnectionPool; + + template + explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) + : Session(args...), pool(std::move(pool_)), metrics(std::move(metrics_)) + { + CurrentMetrics::add(metrics.active_count); + } + + template + static Ptr create(Args &&... args) + { + /// Pool is global, we shouldn't attribute this memory to query/user. + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + + struct make_shared_enabler : public PooledConnection + { + explicit make_shared_enabler(Args &&... args) : PooledConnection(std::forward(args)...) { } + }; + return std::make_shared(std::forward(args)...); + } + + void doConnect() + { + Session::reconnect(); + } + + bool isCompleted() const + { + return request_stream_completed && response_stream_completed; + } + + WeakPtr pool; + IHTTPConnectionPoolForEndpoint::Metrics metrics; + + Poco::Logger * log = &Poco::Logger::get("PooledConnection"); + + std::ostream * request_stream = nullptr; + std::istream * response_stream = nullptr; + + bool request_stream_completed = true; + bool response_stream_completed = true; + }; + + using Connection = PooledConnection; + using ConnectionPtr = PooledConnection::Ptr; + + struct GreaterByLastRequest + { + static bool operator()(const ConnectionPtr & l, const ConnectionPtr & r) + { + return l->getLastRequest() + l->getKeepAliveTimeout() > r->getLastRequest() + r->getKeepAliveTimeout(); + } + }; + + using ConnectionsMinHeap = std::priority_queue, GreaterByLastRequest>; + +public: + template + static Ptr create(Args &&... args) + { + struct make_shared_enabler : public EndpointConnectionPool + { + explicit make_shared_enabler(Args &&... args) : EndpointConnectionPool(std::forward(args)...) { } + }; + return std::make_shared(std::forward(args)...); + } + + ~EndpointConnectionPool() override + { + CurrentMetrics::sub(group->getMetrics().stored_count, stored_connections.size()); + } + + String getTarget() const + { + if (!proxy_configuration.isEmpty()) + return fmt::format("{} over proxy {}", host, proxy_configuration.host); + return host; + } + + IHTTPConnectionPoolForEndpoint::ConnectionPtr getConnection(const ConnectionTimeouts & timeouts) override + { + Poco::Timestamp now; + std::vector expired_connections; + + SCOPE_EXIT({ + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + expired_connections.clear(); + }); + + { + std::lock_guard lock(mutex); + + wipeExpiredImpl(expired_connections, now); + + if (!stored_connections.empty()) + { + auto it = stored_connections.top(); + stored_connections.pop(); + + setTimeouts(*it, timeouts); + + ProfileEvents::increment(getMetrics().reused, 1); + CurrentMetrics::sub(getMetrics().stored_count, 1); + + return it; + } + } + + return prepareNewConnection(timeouts); + } + + const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const override + { + return group->getMetrics(); + } + + HTTPConnectionGroupType getGroupType() const override + { + return group->getType(); + } + + size_t wipeExpired() override + { + Poco::Timestamp now; + std::vector expired_connections; + + SCOPE_EXIT({ + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + expired_connections.clear(); + }); + + std::lock_guard lock(mutex); + return wipeExpiredImpl(expired_connections, now); + } + + size_t wipeExpiredImpl(std::vector & expired_connections, Poco::Timestamp now) TSA_REQUIRES(mutex) + { + while (!stored_connections.empty()) + { + auto connection = stored_connections.top(); + + if (!isExpired(now, connection)) + return stored_connections.size(); + + stored_connections.pop(); + expired_connections.push_back(connection); + } + + CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); + ProfileEvents::increment(getMetrics().expired, expired_connections.size()); + + return stored_connections.size(); + } + +private: + EndpointConnectionPool(ConnectionGroup::Ptr group_, String host_, UInt16 port_, bool https_, ProxyConfiguration proxy_configuration_) + : host(std::move(host_)) + , port(port_) + , https(https_) + , proxy_configuration(std::move(proxy_configuration_)) + , group(group_) + { + } + + WeakPtr getWeakFromThis() { return EndpointConnectionPool::weak_from_this(); } + + bool isExpired(Poco::Timestamp & now, ConnectionPtr connection) + { + if (group->isSoftLimitReached()) + return now > (connection->getLastRequest() + divide(connection->getKeepAliveTimeout(), 10)); + return now > connection->getLastRequest() + connection->getKeepAliveTimeout(); + } + + ConnectionPtr allocateNewConnection() + { + ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), getMetrics(), host, port); + connection->setKeepAlive(true); + + if (!proxy_configuration.isEmpty()) + { + connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); + } + + group->atConnectionCreate(); + + return connection; + } + + ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) + { + auto address = HostResolversPool::instance().getResolver(host)->resolve(); + + auto session = allocateNewConnection(); + + setTimeouts(*session, timeouts); + session->setResolvedHost(*address); + + try + { + auto timer = CurrentThread::getProfileEvents().timer(getMetrics().elapsed_microseconds); + session->doConnect(); + } + catch (...) + { + address.setFail(); + ProfileEvents::increment(getMetrics().errors); + session->reset(); + throw; + } + + ProfileEvents::increment(getMetrics().created); + return session; + } + + void atConnectionDestroy(PooledConnection & connection) + { + group->atConnectionDestroy(); + + if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() + || group->isStoreLimitReached()) + { + ProfileEvents::increment(getMetrics().reset, 1); + return; + } + + auto connection_to_store = allocateNewConnection(); + connection_to_store->assign(connection); + + CurrentMetrics::add(getMetrics().stored_count, 1); + ProfileEvents::increment(getMetrics().preserved, 1); + + { + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + std::lock_guard lock(mutex); + stored_connections.push(connection_to_store); + } + } + + + const std::string host; + const UInt16 port; + const bool https; + const ProxyConfiguration proxy_configuration; + const ConnectionGroup::Ptr group; + + std::mutex mutex; + ConnectionsMinHeap stored_connections TSA_GUARDED_BY(mutex); +}; + +struct EndpointPoolKey +{ + HTTPConnectionGroupType connection_group; + String target_host; + UInt16 target_port; + bool is_target_https; + ProxyConfiguration proxy_config; + + bool operator==(const EndpointPoolKey & rhs) const + { + return std::tie( + connection_group, + target_host, + target_port, + is_target_https, + proxy_config.host, + proxy_config.port, + proxy_config.protocol, + proxy_config.tunneling, + proxy_config.original_request_protocol) + == std::tie( + rhs.connection_group, + rhs.target_host, + rhs.target_port, + rhs.is_target_https, + rhs.proxy_config.host, + rhs.proxy_config.port, + rhs.proxy_config.protocol, + rhs.proxy_config.tunneling, + rhs.proxy_config.original_request_protocol); + } +}; + +struct Hasher +{ + size_t operator()(const EndpointPoolKey & k) const + { + SipHash s; + s.update(k.connection_group); + s.update(k.target_host); + s.update(k.target_port); + s.update(k.is_target_https); + s.update(k.proxy_config.host); + s.update(k.proxy_config.port); + s.update(k.proxy_config.protocol); + s.update(k.proxy_config.tunneling); + s.update(k.proxy_config.original_request_protocol); + return s.get64(); + } +}; + +IExtendedPool::Ptr +createConnectionPool(ConnectionGroup::Ptr group, std::string host, UInt16 port, bool secure, ProxyConfiguration proxy_configuration) +{ + if (secure) + { +#if USE_SSL + return EndpointConnectionPool::create( + group, std::move(host), port, secure, std::move(proxy_configuration)); +#else + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "Inter-server secret support is disabled, because ClickHouse was built without SSL library"); +#endif + } + else + { + return EndpointConnectionPool::create( + group, std::move(host), port, secure, std::move(proxy_configuration)); + } +} + +class HTTPConnectionPools::Impl +{ +private: + const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 5 * 60; + const Poco::Timespan wipe_timeout = Poco::Timespan(DEFAULT_WIPE_TIMEOUT_SECONDS, 0); + + ConnectionGroup::Ptr disk_group = std::make_shared(HTTPConnectionGroupType::DISK); + ConnectionGroup::Ptr storage_group = std::make_shared(HTTPConnectionGroupType::STORAGE); + ConnectionGroup::Ptr http_group = std::make_shared(HTTPConnectionGroupType::HTTP); + + + /// If multiple mutexes are held simultaneously, + /// they should be locked in this order: + /// HTTPConnectionPools::mutex, then EndpointConnectionPool::mutex, then ConnectionGroup::mutex. + std::mutex mutex; + + std::unordered_map endpoints_pool TSA_GUARDED_BY(mutex); + Poco::Timestamp last_wipe_time TSA_GUARDED_BY(mutex); + +public: + IHTTPConnectionPoolForEndpoint::Ptr getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration) + { + Poco::Timestamp now; + + std::lock_guard lock(mutex); + + if (now - last_wipe_time > wipe_timeout) + { + wipeExpired(); + last_wipe_time = now; + } + + return getPoolImpl(type, uri, proxy_configuration); + } + + void setLimits(HTTPConnectionPools::Limits disk, HTTPConnectionPools::Limits storage, HTTPConnectionPools::Limits http) + { + disk_group->setLimits(std::move(disk)); + storage_group->setLimits(std::move(storage)); + http_group->setLimits(std::move(http)); + } + + void dropCache() + { + std::lock_guard lock(mutex); + endpoints_pool.clear(); + } + +protected: + ConnectionGroup::Ptr & getGroup(HTTPConnectionGroupType type) + { + switch (type) + { + case HTTPConnectionGroupType::DISK: + return disk_group; + case HTTPConnectionGroupType::STORAGE: + return storage_group; + case HTTPConnectionGroupType::HTTP: + return http_group; + } + } + + IExtendedPool::Ptr getPoolImpl(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration) + TSA_REQUIRES(mutex) + { + auto [host, port, secure] = getHostPortSecure(uri, proxy_configuration); + auto key = EndpointPoolKey{type, host, port, secure, proxy_configuration}; + + auto it = endpoints_pool.find(key); + if (it != endpoints_pool.end()) + return it->second; + + it = endpoints_pool.emplace(key, createConnectionPool(getGroup(type), std::move(host), port, secure, proxy_configuration)).first; + + return it->second; + } + + void wipeExpired() TSA_REQUIRES(mutex) + { + std::vector keys_to_drop; + + for (auto & [key, pool] : endpoints_pool) + { + auto left_connections = pool->wipeExpired(); + if (left_connections == 0 && pool->getGroupType() != HTTPConnectionGroupType::DISK) + keys_to_drop.push_back(key); + } + + for (const auto & key : keys_to_drop) + endpoints_pool.erase(key); + } + + static bool useSecureConnection(const Poco::URI & uri, const ProxyConfiguration & proxy_configuration) + { + if (uri.getScheme() == "http") + return false; + + if (uri.getScheme() != "https") + throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString()); + + if (!proxy_configuration.isEmpty()) + { + if (ProxyConfiguration::Protocol::HTTP == proxy_configuration.protocol && !proxy_configuration.tunneling) + { + // If it is an HTTPS request, proxy server is HTTP and user opted for tunneling off, we must not create an HTTPS request. + // The desired flow is: HTTP request to the proxy server, then proxy server will initiate an HTTPS request to the target server. + // There is a weak link in the security, but that's what the user opted for. + return false; + } + } + + return true; + } + + static std::tuple getHostPortSecure(const Poco::URI & uri, const ProxyConfiguration & proxy_configuration) + { + return std::make_tuple(uri.getHost(), uri.getPort(), useSecureConnection(uri, proxy_configuration)); + } +}; + +HTTPConnectionPools::HTTPConnectionPools() + : impl(std::make_unique()) +{ +} + +HTTPConnectionPools & HTTPConnectionPools::instance() +{ + static HTTPConnectionPools instance; + return instance; +} + +void HTTPConnectionPools::setLimits(HTTPConnectionPools::Limits disk, HTTPConnectionPools::Limits storage, HTTPConnectionPools::Limits http) +{ + impl->setLimits(std::move(disk), std::move(storage), std::move(http)); +} + +void HTTPConnectionPools::dropCache() +{ + impl->dropCache(); +} + +IHTTPConnectionPoolForEndpoint::Ptr +HTTPConnectionPools::getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration) +{ + return impl->getPool(type, uri, proxy_configuration); +} +} diff --git a/src/Common/HTTPConnectionPool.h b/src/Common/HTTPConnectionPool.h new file mode 100644 index 00000000000..7fd0847f5a7 --- /dev/null +++ b/src/Common/HTTPConnectionPool.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include + +namespace DB +{ + +class IHTTPConnectionPoolForEndpoint +{ +public: + struct Metrics + { + const ProfileEvents::Event created = ProfileEvents::end(); + const ProfileEvents::Event reused = ProfileEvents::end(); + const ProfileEvents::Event reset = ProfileEvents::end(); + const ProfileEvents::Event preserved = ProfileEvents::end(); + const ProfileEvents::Event expired = ProfileEvents::end(); + const ProfileEvents::Event errors = ProfileEvents::end(); + const ProfileEvents::Event elapsed_microseconds = ProfileEvents::end(); + + const CurrentMetrics::Metric stored_count = CurrentMetrics::end(); + const CurrentMetrics::Metric active_count = CurrentMetrics::end(); + }; + + using Ptr = std::shared_ptr; + using Connection = Poco::Net::HTTPClientSession; + using ConnectionPtr = std::shared_ptr; + + /// can throw Poco::Net::Exception, DB::NetException, DB::Exception + virtual ConnectionPtr getConnection(const ConnectionTimeouts & timeouts) = 0; + virtual const Metrics & getMetrics() const = 0; + virtual ~IHTTPConnectionPoolForEndpoint() = default; + +protected: + IHTTPConnectionPoolForEndpoint() = default; + + IHTTPConnectionPoolForEndpoint(const IHTTPConnectionPoolForEndpoint &) = delete; + IHTTPConnectionPoolForEndpoint & operator=(const IHTTPConnectionPoolForEndpoint &) = delete; +}; + +enum class HTTPConnectionGroupType +{ + DISK, + STORAGE, + HTTP, +}; + +class HTTPConnectionPools +{ +public: + struct Limits + { + size_t soft_limit = 100; + size_t warning_limit = 1000; + size_t store_limit = 10000; + + static constexpr size_t warning_step = 100; + }; + +private: + HTTPConnectionPools(); + HTTPConnectionPools(const HTTPConnectionPools &) = delete; + HTTPConnectionPools & operator=(const HTTPConnectionPools &) = delete; + +public: + static HTTPConnectionPools & instance(); + + void setLimits(Limits disk, Limits storage, Limits http); + void dropCache(); + + IHTTPConnectionPoolForEndpoint::Ptr getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration); + +private: + class Impl; + std::unique_ptr impl; +}; + +} diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp new file mode 100644 index 00000000000..f6cc9c919ba --- /dev/null +++ b/src/Common/HostResolvePool.cpp @@ -0,0 +1,293 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace ProfileEvents +{ + extern const Event AddressesDiscovered; + extern const Event AddressesExpired; + extern const Event AddressesFailScored; +} + +namespace CurrentMetrics +{ + extern const Metric AddressesActive; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DNS_ERROR; +} + +HostResolverMetrics HostResolver::getMetrics() +{ + return HostResolverMetrics{ + .discovered = ProfileEvents::AddressesDiscovered, + .expired = ProfileEvents::AddressesExpired, + .failed = ProfileEvents::AddressesFailScored, + .active_count = CurrentMetrics::AddressesActive, + }; +} + +HostResolver::WeakPtr HostResolver::getWeakFromThis() +{ + return weak_from_this(); +} + +HostResolver::HostResolver(String host_, Poco::Timespan history_) + : host(std::move(host_)) + , history(history_) + , resolve_function([](const String & host_to_resolve) { return DNSResolver::instance().resolveHostAll(host_to_resolve); }) +{ + update(); +} + +HostResolver::HostResolver( + ResolveFunction && resolve_function_, String host_, Poco::Timespan history_) + : host(std::move(host_)), history(history_), resolve_function(std::move(resolve_function_)) +{ + update(); +} + +HostResolver::~HostResolver() +{ + std::lock_guard lock(mutex); + CurrentMetrics::sub(metrics.active_count, records.size()); + records.clear(); +} + +void HostResolver::Entry::setFail() +{ + fail = true; + + if (auto lock = pool.lock()) + lock->setFail(address); +} + +HostResolver::Entry::~Entry() +{ + if (!fail) + { + if (auto lock = pool.lock()) + lock->setSuccess(address); + } +} + +void HostResolver::update() +{ + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + + auto next_gen = resolve_function(host); + if (next_gen.empty()) + throw NetException(ErrorCodes::DNS_ERROR, "no endpoints resolved for host {}", host); + + std::sort(next_gen.begin(), next_gen.end()); + + Poco::Timestamp now; + + std::lock_guard lock(mutex); + updateImpl(now, next_gen); +} + +void HostResolver::reset() +{ + std::lock_guard lock(mutex); + + CurrentMetrics::sub(metrics.active_count, records.size()); + records.clear(); +} + +void HostResolver::updateWeights() +{ + updateWeightsImpl(); + + if (getTotalWeight() == 0 && !records.empty()) + { + for (auto & rec : records) + rec.failed = false; + + updateWeightsImpl(); + } + + chassert((getTotalWeight() > 0 && !records.empty()) || records.empty()); + random_weight_picker = std::uniform_int_distribution(0, getTotalWeight() - 1); +} + +HostResolver::Entry HostResolver::resolve() +{ + if (isUpdateNeeded()) + update(); + + std::lock_guard lock(mutex); + return Entry(*this, selectBest()); +} + +void HostResolver::setSuccess(const Poco::Net::IPAddress & address) +{ + std::lock_guard lock(mutex); + + auto it = find(address); + if (it == records.end()) + return; + + auto old_weight = it->getWeight(); + ++it->usage; + auto new_weight = it->getWeight(); + + if (old_weight != new_weight) + updateWeights(); +} + +void HostResolver::setFail(const Poco::Net::IPAddress & address) +{ + Poco::Timestamp now; + + { + std::lock_guard lock(mutex); + + auto it = find(address); + if (it == records.end()) + return; + + it->failed = true; + it->fail_time = now; + } + + ProfileEvents::increment(metrics.failed); + update(); +} + +Poco::Net::IPAddress HostResolver::selectBest() +{ + chassert(!records.empty()); + size_t weight = random_weight_picker(thread_local_rng); + auto it = std::partition_point(records.begin(), records.end(), [&](const Record & rec) { return rec.weight_prefix_sum <= weight; }); + chassert(it != records.end()); + return it->address; +} + +HostResolver::Records::iterator HostResolver::find(const Poco::Net::IPAddress & addr) TSA_REQUIRES(mutex) +{ + return std::lower_bound( + records.begin(), records.end(), addr, [](const Record & rec, const Poco::Net::IPAddress & value) { return rec.address < value; }); +} + +bool HostResolver::isUpdateNeeded() +{ + Poco::Timestamp now; + + std::lock_guard lock(mutex); + return last_resolve_time + history < now || records.empty(); +} + +void HostResolver::updateImpl(Poco::Timestamp now, std::vector & next_gen) + TSA_REQUIRES(mutex) +{ + const auto last_effective_resolve = now - history; + + Records merged; + merged.reserve(records.size() + next_gen.size()); + + auto it_before = records.begin(); + auto it_next = next_gen.begin(); + + while (it_before != records.end() || it_next != next_gen.end()) + { + if (it_next == next_gen.end() || (it_before != records.end() && it_before->address < *it_next)) + { + if (it_before->resolve_time >= last_effective_resolve) + merged.push_back(*it_before); + else + { + CurrentMetrics::sub(metrics.active_count, 1); + ProfileEvents::increment(metrics.expired, 1); + } + ++it_before; + } + else if (it_before == records.end() || (it_next != next_gen.end() && *it_next < it_before->address)) + { + CurrentMetrics::add(metrics.active_count, 1); + ProfileEvents::increment(metrics.discovered, 1); + merged.push_back(Record(*it_next, now)); + ++it_next; + } + else + { + merged.push_back(*it_before); + merged.back().resolve_time = now; + + ++it_before; + ++it_next; + } + } + + for (auto & rec : merged) + if (rec.failed && rec.fail_time < last_effective_resolve) + rec.failed = false; + + chassert(std::is_sorted(merged.begin(), merged.end())); + + last_resolve_time = now; + records.swap(merged); + + if (records.empty()) + throw NetException(ErrorCodes::DNS_ERROR, "no endpoints resolved for host {}", host); + + updateWeights(); +} + +size_t HostResolver::getTotalWeight() const +{ + if (records.empty()) + return 0; + return records.back().weight_prefix_sum; +} + + +void HostResolver::updateWeightsImpl() +{ + size_t total_weight_next = 0; + + for (auto & rec: records) + { + total_weight_next += rec.getWeight(); + rec.weight_prefix_sum = total_weight_next; + } +} + +HostResolversPool & HostResolversPool::instance() +{ + static HostResolversPool instance; + return instance; +} + +void HostResolversPool::dropCache() +{ + std::lock_guard lock(mutex); + host_pools.clear(); +} + +HostResolver::Ptr HostResolversPool::getResolver(const String & host) +{ + std::lock_guard lock(mutex); + + auto it = host_pools.find(host); + if (it != host_pools.end()) + return it->second; + + it = host_pools.emplace(host, HostResolver::create(host)).first; + + return it->second; +} + +} diff --git a/src/Common/HostResolvePool.h b/src/Common/HostResolvePool.h new file mode 100644 index 00000000000..2a31cec3b2d --- /dev/null +++ b/src/Common/HostResolvePool.h @@ -0,0 +1,218 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include + +#include +#include + +// That class resolves host into multiply addresses +// Features: +// - balance address usage. +// `selectBest()` chooses the address by random with weights. +// The more ip is used the lesser weight it has. When new address is happened, it takes more weight. +// But still not all requests are assigned to the new address. +// - join resolve results +// In case when host is resolved into different set of addresses, this class join all that addresses and use them. +// An address expires after `history_` time. +// - failed address pessimization +// If an address marked with `setFail()` it is marked as faulty. Such address won't be selected until either +// a) it still occurs in resolve set after `history_` time or b) all other addresses are pessimized as well. +// - resolve schedule +// Addresses are resolved through `DB::DNSResolver::instance()`. +// Usually it does not happen more often than once in `history_` time. +// But also new resolve performed each `setFail()` call. + +namespace DB +{ + +struct HostResolverMetrics +{ + const ProfileEvents::Event discovered = ProfileEvents::end(); + const ProfileEvents::Event expired = ProfileEvents::end(); + const ProfileEvents::Event failed = ProfileEvents::end(); + + const CurrentMetrics::Metric active_count = CurrentMetrics::end(); +}; + +constexpr size_t DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60; + + +class HostResolver : public std::enable_shared_from_this +{ +private: + using WeakPtr = std::weak_ptr; + +public: + using Ptr = std::shared_ptr; + + template + static Ptr create(Args&&... args) + { + struct make_shared_enabler : public HostResolver + { + explicit make_shared_enabler(Args&&... args) : HostResolver(std::forward(args)...) {} + }; + return std::make_shared(std::forward(args)...); + } + + virtual ~HostResolver(); + + class Entry + { + public: + explicit Entry(Entry && entry) = default; + explicit Entry(Entry & entry) = delete; + + // no access as r-value + const String * operator->() && = delete; + const String * operator->() const && = delete; + const String & operator*() && = delete; + const String & operator*() const && = delete; + + const String * operator->() & { return &resolved_host; } + const String * operator->() const & { return &resolved_host; } + const String & operator*() & { return resolved_host; } + const String & operator*() const & { return resolved_host; } + + void setFail(); + ~Entry(); + + private: + friend class HostResolver; + + Entry(HostResolver & pool_, Poco::Net::IPAddress address_) + : pool(pool_.getWeakFromThis()) + , address(std::move(address_)) + , resolved_host(address.toString()) + { } + + HostResolver::WeakPtr pool; + const Poco::Net::IPAddress address; + const String resolved_host; + + bool fail = false; + }; + + /// can throw NetException(ErrorCodes::DNS_ERROR, ...), Exception(ErrorCodes::BAD_ARGUMENTS, ...) + Entry resolve(); + void update(); + void reset(); + + static HostResolverMetrics getMetrics(); + +protected: + explicit HostResolver( + String host_, + Poco::Timespan history_ = Poco::Timespan(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS, 0)); + + using ResolveFunction = std::function (const String & host)>; + HostResolver(ResolveFunction && resolve_function_, + String host_, + Poco::Timespan history_); + + friend class Entry; + WeakPtr getWeakFromThis(); + + void setSuccess(const Poco::Net::IPAddress & address); + void setFail(const Poco::Net::IPAddress & address); + + struct Record + { + Record(Poco::Net::IPAddress address_, Poco::Timestamp resolve_time_) + : address(std::move(address_)) + , resolve_time(resolve_time_) + {} + + explicit Record(Record && rec) = default; + Record& operator=(Record && s) = default; + + explicit Record(const Record & rec) = default; + Record& operator=(const Record & s) = default; + + Poco::Net::IPAddress address; + Poco::Timestamp resolve_time; + size_t usage = 0; + bool failed = false; + Poco::Timestamp fail_time = 0; + + size_t weight_prefix_sum; + + bool operator <(const Record & r) const + { + return address < r.address; + } + + size_t getWeight() const + { + if (failed) + return 0; + + /// There is no goal to make usage's distribution ideally even + /// The goal is to chose more often new address, but still use old addresses as well + /// when all addresses have usage counter greater than 10000, + /// no more corrections are needed, just random choice is ok + if (usage > 10000) + return 1; + if (usage > 1000) + return 5; + if (usage > 100) + return 8; + return 10; + } + }; + + using Records = std::vector; + + Poco::Net::IPAddress selectBest() TSA_REQUIRES(mutex); + Records::iterator find(const Poco::Net::IPAddress & address) TSA_REQUIRES(mutex); + bool isUpdateNeeded(); + + void updateImpl(Poco::Timestamp now, std::vector & next_gen) TSA_REQUIRES(mutex); + void updateWeights() TSA_REQUIRES(mutex); + void updateWeightsImpl() TSA_REQUIRES(mutex); + size_t getTotalWeight() const TSA_REQUIRES(mutex); + + const String host; + const Poco::Timespan history; + const HostResolverMetrics metrics = getMetrics(); + + // for tests purpose + const ResolveFunction resolve_function; + + std::mutex mutex; + + Poco::Timestamp last_resolve_time TSA_GUARDED_BY(mutex); + Records records TSA_GUARDED_BY(mutex); + + std::uniform_int_distribution random_weight_picker TSA_GUARDED_BY(mutex); + + Poco::Logger * log = &Poco::Logger::get("ConnectionPool"); +}; + +class HostResolversPool +{ +private: + HostResolversPool() = default; + HostResolversPool(const HostResolversPool &) = delete; + HostResolversPool & operator=(const HostResolversPool &) = delete; + +public: + static HostResolversPool & instance(); + + void dropCache(); + + HostResolver::Ptr getResolver(const String & host); +private: + std::mutex mutex; + std::unordered_map host_pools TSA_GUARDED_BY(mutex); +}; + +} + diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 052c059a72d..0c9582ab4fb 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -696,6 +696,35 @@ The server successfully detected this situation and will download merged part fr \ M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \ M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas") \ + \ + M(StorageConnectionsCreated, "Number of created connections for storages") \ + M(StorageConnectionsReused, "Number of reused connections for storages") \ + M(StorageConnectionsReset, "Number of reset connections for storages") \ + M(StorageConnectionsPreserved, "Number of preserved connections for storages") \ + M(StorageConnectionsExpired, "Number of expired connections for storages") \ + M(StorageConnectionsErrors, "Number of cases when creation of a connection for storage is failed") \ + M(StorageConnectionsElapsedMicroseconds, "Total time spend on creating connections for storages") \ + \ + M(DiskConnectionsCreated, "Number of created connections for disk") \ + M(DiskConnectionsReused, "Number of reused connections for disk") \ + M(DiskConnectionsReset, "Number of reset connections for disk") \ + M(DiskConnectionsPreserved, "Number of preserved connections for disk") \ + M(DiskConnectionsExpired, "Number of expired connections for disk") \ + M(DiskConnectionsErrors, "Number of cases when creation of a connection for disk is failed") \ + M(DiskConnectionsElapsedMicroseconds, "Total time spend on creating connections for disk") \ + \ + M(HTTPConnectionsCreated, "Number of created http connections") \ + M(HTTPConnectionsReused, "Number of reused http connections") \ + M(HTTPConnectionsReset, "Number of reset http connections") \ + M(HTTPConnectionsPreserved, "Number of preserved http connections") \ + M(HTTPConnectionsExpired, "Number of expired http connections") \ + M(HTTPConnectionsErrors, "Number of cases when creation of a http connection failed") \ + M(HTTPConnectionsElapsedMicroseconds, "Total time spend on creating http connections") \ + \ + M(AddressesDiscovered, "Total count of new addresses in dns resolve results for connection pools") \ + M(AddressesExpired, "Total count of expired addresses which is no longer presented in dns resolve results for for connection pools") \ + M(AddressesFailScored, "Total count of new addresses in dns resolve results for for connection pools") \ + #ifdef APPLY_FOR_EXTERNAL_EVENTS #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) diff --git a/src/Common/ProxyConfiguration.h b/src/Common/ProxyConfiguration.h index 53e569bf6e4..11a09cb5924 100644 --- a/src/Common/ProxyConfiguration.h +++ b/src/Common/ProxyConfiguration.h @@ -44,11 +44,13 @@ struct ProxyConfiguration } } - std::string host; - Protocol protocol; - uint16_t port; - bool tunneling; - Protocol original_request_protocol; + std::string host = std::string{}; + Protocol protocol = Protocol::HTTP; + uint16_t port = 0; + bool tunneling = false; + Protocol original_request_protocol = Protocol::HTTP; + + bool isEmpty() const { return host.size() == 0; } }; } diff --git a/src/Common/ProxyListConfigurationResolver.cpp b/src/Common/ProxyListConfigurationResolver.cpp index 01a6f52185f..c9b8923929a 100644 --- a/src/Common/ProxyListConfigurationResolver.cpp +++ b/src/Common/ProxyListConfigurationResolver.cpp @@ -26,8 +26,6 @@ ProxyConfiguration ProxyListConfigurationResolver::resolve() auto & proxy = proxies[index]; - LOG_DEBUG(getLogger("ProxyListConfigurationResolver"), "Use proxy: {}", proxies[index].toString()); - return ProxyConfiguration { proxy.getHost(), ProxyConfiguration::protocolFromString(proxy.getScheme()), diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 117c8a34dbb..ef972a8e318 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -69,7 +69,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() { auto resolved_endpoint = endpoint; resolved_endpoint.setHost(resolved_hosts[i].toString()); - session = makeHTTPSession(resolved_endpoint, timeouts); + session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts); try { diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp new file mode 100644 index 00000000000..01b78958442 --- /dev/null +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -0,0 +1,558 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace +{ + +size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) +{ + const size_t buffer_size = 4096; + char buffer[buffer_size]; + + size_t total_read = 0; + + while (count > buffer_size) + { + in.read(buffer, buffer_size); + size_t read = in.gcount(); + out.write(buffer, read); + count -= read; + total_read += read; + + if (read == 0) + return total_read; + } + + in.read(buffer, count); + size_t read = in.gcount(); + out.write(buffer, read); + total_read += read; + + return total_read; +} + +class MockRequestHandler : public Poco::Net::HTTPRequestHandler +{ +public: + explicit MockRequestHandler(std::shared_ptr> slowdown_) + : slowdown(std::move(slowdown_)) + { + } + + void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override + { + response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); + auto size = request.getContentLength(); + if (size > 0) + response.setContentLength(size); // ContentLength is required for keep alive + else + response.setChunkedTransferEncoding(true); // or chunk encoding + + sleepForSeconds(*slowdown); + + stream_copy_n(request.stream(), response.send(), size); + } + + std::shared_ptr> slowdown; +}; + +class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory +{ +public: + explicit HTTPRequestHandlerFactory(std::shared_ptr> slowdown_) + : slowdown(std::move(slowdown_)) + { + } + + Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override + { + return new MockRequestHandler(slowdown); + } + + std::shared_ptr> slowdown; +}; + +} + +using HTTPSession = Poco::Net::HTTPClientSession; +using HTTPSessionPtr = std::shared_ptr; + +class ConnectionPoolTest : public testing::Test { +protected: + ConnectionPoolTest() + { + startServer(); + } + + void SetUp() override { + timeouts = DB::ConnectionTimeouts(); + DB::HTTPConnectionPools::Limits def_limits{}; + DB::HTTPConnectionPools::instance().setLimits(def_limits, def_limits, def_limits); + + setSlowDown(0); + + DB::HTTPConnectionPools::instance().dropCache(); + DB::CurrentThread::getProfileEvents().reset(); + // Code here will be called immediately after the constructor (right + // before each test). + } + + void TearDown() override { + // Code here will be called immediately after each test (right + // before the destructor). + } + + DB::IHTTPConnectionPoolForEndpoint::Ptr getPool() + { + auto uri = Poco::URI(getServerUrl()); + return DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{}); + } + + std::string getServerUrl() const + { + return "http://" + server_data.socket->address().toString(); + } + + void startServer() + { + server_data.reset(); + server_data.params = new Poco::Net::HTTPServerParams(); + server_data.socket = std::make_unique(server_data.port); + server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive); + server_data.server = std::make_unique( + server_data.handler_factory, *server_data.socket, server_data.params); + + server_data.server->start(); + } + + Poco::Net::HTTPServer & getServer() const + { + return *server_data.server; + } + + void setSlowDown(size_t seconds) + { + *slowdown_receive = seconds; + } + + DB::ConnectionTimeouts timeouts; + std::shared_ptr> slowdown_receive = std::make_shared>(0); + + struct ServerData + { + // just some port to avoid collisions with others tests + UInt16 port = 9871; + Poco::Net::HTTPServerParams::Ptr params; + std::unique_ptr socket; + HTTPRequestHandlerFactory::Ptr handler_factory; + std::unique_ptr server; + + ServerData() = default; + ServerData(ServerData &&) = default; + ServerData & operator =(ServerData &&) = delete; + + void reset() + { + if (server) + server->stop(); + + server = nullptr; + handler_factory = nullptr; + socket = nullptr; + params = nullptr; + } + + ~ServerData() { + reset(); + } + }; + + ServerData server_data; +}; + + +void wait_until(std::function pred) +{ + while (!pred()) + sleepForMilliseconds(250); +} + +void echoRequest(String data, HTTPSession & session) +{ + { + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_PUT, "/", "HTTP/1.1"); // HTTP/1.1 is required for keep alive + request.setContentLength(data.size()); + std::ostream & ostream = session.sendRequest(request); + ostream << data; + } + + { + std::stringstream result; + Poco::Net::HTTPResponse response; + std::istream & istream = session.receiveResponse(response); + ASSERT_EQ(response.getStatus(), Poco::Net::HTTPResponse::HTTP_OK); + + stream_copy_n(istream, result); + ASSERT_EQ(data, result.str()); + } +} + +TEST_F(ConnectionPoolTest, CanConnect) +{ + auto pool = getPool(); + auto connection = pool->getConnection(timeouts); + + ASSERT_TRUE(connection->connected()); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + wait_until([&] () { return getServer().currentConnections() == 1; }); + ASSERT_EQ(1, getServer().currentConnections()); + ASSERT_EQ(1, getServer().totalConnections()); + + connection->reset(); + + wait_until([&] () { return getServer().currentConnections() == 0; }); + ASSERT_EQ(0, getServer().currentConnections()); + ASSERT_EQ(1, getServer().totalConnections()); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); +} + +TEST_F(ConnectionPoolTest, CanRequest) +{ + auto pool = getPool(); + auto connection = pool->getConnection(timeouts); + + echoRequest("Hello", *connection); + + ASSERT_EQ(1, getServer().totalConnections()); + ASSERT_EQ(1, getServer().currentConnections()); + + connection->reset(); + + wait_until([&] () { return getServer().currentConnections() == 0; }); + ASSERT_EQ(0, getServer().currentConnections()); + ASSERT_EQ(1, getServer().totalConnections()); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); +} + +TEST_F(ConnectionPoolTest, CanPreserve) +{ + auto pool = getPool(); + + { + auto connection = pool->getConnection(timeouts); + } + + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); + + wait_until([&] () { return getServer().currentConnections() == 1; }); + ASSERT_EQ(1, getServer().currentConnections()); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); +} + +TEST_F(ConnectionPoolTest, CanReuse) +{ + auto pool = getPool(); + + { + auto connection = pool->getConnection(timeouts); + // DB::setReuseTag(*connection); + } + + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); + + { + auto connection = pool->getConnection(timeouts); + + ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + wait_until([&] () { return getServer().currentConnections() == 1; }); + ASSERT_EQ(1, getServer().currentConnections()); + + echoRequest("Hello", *connection); + + ASSERT_EQ(1, getServer().totalConnections()); + ASSERT_EQ(1, getServer().currentConnections()); + + connection->reset(); + } + + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + wait_until([&] () { return getServer().currentConnections() == 0; }); + ASSERT_EQ(0, getServer().currentConnections()); + ASSERT_EQ(1, getServer().totalConnections()); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); +} + +TEST_F(ConnectionPoolTest, CanReuse10) +{ + auto pool = getPool(); + + + for (int i = 0; i < 10; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + + { + auto connection = pool->getConnection(timeouts); + connection->reset(); // reset just not to wait its expiration here + } + + wait_until([&] () { return getServer().currentConnections() == 0; }); + ASSERT_EQ(0, getServer().currentConnections()); + ASSERT_EQ(1, getServer().totalConnections()); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); +} + +TEST_F(ConnectionPoolTest, CanReuse5) +{ + timeouts.withHTTPKeepAliveTimeout(1); + + auto pool = getPool(); + + std::vector connections; + connections.reserve(5); + for (int i = 0; i < 5; ++i) + { + connections.push_back(pool->getConnection(timeouts)); + } + connections.clear(); + + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + + wait_until([&] () { return getServer().currentConnections() == 5; }); + ASSERT_EQ(5, getServer().currentConnections()); + ASSERT_EQ(5, getServer().totalConnections()); + + for (int i = 0; i < 5; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + + ASSERT_EQ(5, getServer().totalConnections()); + + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); +} + +TEST_F(ConnectionPoolTest, CanReconnectAndCreate) +{ + auto pool = getPool(); + + std::vector in_use; + + const size_t count = 2; + for (int i = 0; i < count; ++i) + { + auto connection = pool->getConnection(timeouts); + // DB::setReuseTag(*connection); + in_use.push_back(connection); + } + + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + + ASSERT_EQ(count, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + auto connection = std::move(in_use.back()); + in_use.pop_back(); + + echoRequest("Hello", *connection); + + connection->abort(); // further usage requires reconnect, new connection + + echoRequest("Hello", *connection); + + connection->reset(); + + wait_until([&] () { return getServer().currentConnections() == 1; }); + ASSERT_EQ(1, getServer().currentConnections()); + ASSERT_EQ(count+1, getServer().totalConnections()); + + ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); +} + +TEST_F(ConnectionPoolTest, CanReconnectAndReuse) +{ + auto pool = getPool(); + + std::vector in_use; + + const size_t count = 2; + for (int i = 0; i < count; ++i) + { + auto connection = pool->getConnection(timeouts); + // DB::setReuseTag(*connection); + in_use.push_back(std::move(connection)); + } + + auto connection = std::move(in_use.back()); + in_use.pop_back(); + in_use.clear(); // other connection will be reused + + echoRequest("Hello", *connection); + + connection->abort(); // further usage requires reconnect, reuse connection from pool + + echoRequest("Hello", *connection); + + connection->reset(); + + wait_until([&] () { return getServer().currentConnections() == 0; }); + ASSERT_EQ(0, getServer().currentConnections()); + ASSERT_EQ(2, getServer().totalConnections()); + + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); +} + +TEST_F(ConnectionPoolTest, ReceiveTimeout) +{ + setSlowDown(2); + timeouts.withReceiveTimeout(1); + + auto pool = getPool(); + + { + auto connection = pool->getConnection(timeouts); + ASSERT_ANY_THROW( + echoRequest("Hello", *connection); + ); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + + { + timeouts.withReceiveTimeout(3); + auto connection = pool->getConnection(timeouts); + ASSERT_NO_THROW( + echoRequest("Hello", *connection); + ); + } + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + + { + /// timeouts have effect for reused session + timeouts.withReceiveTimeout(1); + auto connection = pool->getConnection(timeouts); + ASSERT_ANY_THROW( + echoRequest("Hello", *connection); + ); + } + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); +} + +TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) +{ + std::string_view message = "Hello ReadWriteBufferFromHTTP"; + auto uri = Poco::URI(getServerUrl()); + auto metrics = DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{})->getMetrics(); + Poco::Net::HTTPBasicCredentials empty_creds; + auto buf_from_http = DB::BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(DB::HTTPConnectionGroupType::HTTP) + .withOutCallback( + [&] (std::ostream & in) + { + in << message; + }) + .withDelayInit(false) + .create(empty_creds); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); + + char buf[256]; + std::fill(buf, buf + sizeof(buf), 0); + + buf_from_http->readStrict(buf, message.size()); + ASSERT_EQ(std::string_view(buf), message); + ASSERT_TRUE(buf_from_http->eof()); + + buf_from_http.reset(); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, HardLimit) +{ + DB::HTTPConnectionPools::Limits zero_limits {0, 0, 0}; + DB::HTTPConnectionPools::instance().setLimits(zero_limits, zero_limits, zero_limits); + + auto pool = getPool(); + + { + auto connection = pool->getConnection(timeouts); + } + + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); +} diff --git a/src/Common/tests/gtest_resolve_pool.cpp b/src/Common/tests/gtest_resolve_pool.cpp new file mode 100644 index 00000000000..25e867fdebc --- /dev/null +++ b/src/Common/tests/gtest_resolve_pool.cpp @@ -0,0 +1,278 @@ +#include +#include +#include + +#include +#include + +class ResolvePoolMock : public DB::HostResolver +{ +public: + using ResolveFunction = DB::HostResolver::ResolveFunction; + + ResolvePoolMock(String host_, Poco::Timespan history_, ResolveFunction && func) + : DB::HostResolver(std::move(func), std::move(host_), history_) + { + } +}; + +class ResolvePoolTest : public testing::Test +{ +protected: + ResolvePoolTest() + { + DB::HostResolversPool::instance().dropCache(); + } + + void SetUp() override { + DB::CurrentThread::getProfileEvents().reset(); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + + addresses = std::set{"127.0.0.1", "127.0.0.2", "127.0.0.3"}; + // Code here will be called immediately after the constructor (right + // before each test). + } + + void TearDown() override { + // Code here will be called immediately after each test (right + // before the destructor). + } + + DB::HostResolver::Ptr make_resolver(size_t history_ms = 200) + { + auto resolve_func = [&] (const String &) + { + std::vector result; + result.reserve(addresses.size()); + for (const auto & item : addresses) + { + result.push_back(Poco::Net::IPAddress(item)); + } + return result; + }; + + + return std::make_shared("some_host", Poco::Timespan(history_ms * 1000), std::move(resolve_func)); + } + + DB::HostResolverMetrics metrics = DB::HostResolver::getMetrics(); + std::set addresses; +}; + +TEST_F(ResolvePoolTest, CanResolve) +{ + auto resolver = make_resolver(); + auto address = resolver->resolve(); + + ASSERT_TRUE(addresses.contains(*address)); + + ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); + ASSERT_EQ(addresses.size(), CurrentMetrics::get(metrics.active_count)); +} + +TEST_F(ResolvePoolTest, CanResolveAll) +{ + auto resolver = make_resolver(); + + std::set results; + while (results.size() != addresses.size()) + { + auto next_addr = resolver->resolve(); + results.insert(*next_addr); + } + + ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); +} + +size_t getSum(std::map container) +{ + size_t sum = 0; + for (auto & [_, val] : container) + { + sum += val; + } + return sum; +} + +size_t getMin(std::map container) +{ + if (container.empty()) + return 0; + + size_t min_val = container.begin()->second; + for (auto & [_, val] : container) + { + min_val = std::min(min_val, val); + } + return min_val; +} + +double getMean(std::map container) +{ + return 1.0 * getSum(container) / container.size(); +} + +double getMaxDiff(std::map container, double ref_val) +{ + double diff = 0.0; + for (auto & [_, val] : container) + { + diff = std::max(std::fabs(val - ref_val), diff); + } + + return diff; +} + +TEST_F(ResolvePoolTest, CanResolveEvenly) +{ + auto resolver = make_resolver(); + + std::map results; + + for (size_t i = 0; i < 50000; ++i) + { + auto next_addr = resolver->resolve(); + if (results.contains(*next_addr)) + { + results[*next_addr] += 1; + } + else + { + results[*next_addr] = 1; + } + } + + auto mean = getMean(results); + auto diff = getMaxDiff(results, mean); + + ASSERT_GT(0.3 * mean, diff); +} + +TEST_F(ResolvePoolTest, CanMerge) +{ + auto resolver = make_resolver(100000); + auto address = resolver->resolve(); + + ASSERT_TRUE(addresses.contains(*address)); + + ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); + + auto old_addresses = addresses; + addresses = std::set{"127.0.0.4", "127.0.0.5"}; + + + resolver->update(); + ASSERT_EQ(addresses.size() + old_addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); + ASSERT_EQ(addresses.size() + old_addresses.size(), CurrentMetrics::get(metrics.active_count)); + + std::set results; + while (results.size() != addresses.size() + old_addresses.size()) + { + auto next_addr = resolver->resolve(); + results.insert(*next_addr); + } +} + +TEST_F(ResolvePoolTest, CanGainEven) +{ + auto resolver = make_resolver(); + auto address = resolver->resolve(); + + std::map results; + for (size_t i = 0; i < 40000; ++i) + { + auto next_addr = resolver->resolve(); + if (results.contains(*next_addr)) + { + results[*next_addr] += 1; + } + else + { + results[*next_addr] = 1; + } + } + + ASSERT_GT(getMin(results), 10000); + + addresses.insert("127.0.0.4"); + addresses.insert("127.0.0.5"); + + resolver->update(); + + /// return mostly new addresses + for (size_t i = 0; i < 3000; ++i) + { + auto next_addr = resolver->resolve(); + if (results.contains(*next_addr)) + { + results[*next_addr] += 1; + } + else + { + results[*next_addr] = 1; + } + } + + ASSERT_EQ(results.size(), 5); + + ASSERT_GT(getMin(results), 1000); +} + +TEST_F(ResolvePoolTest, CanFail) +{ + auto resolver = make_resolver(10000); + + auto failed_addr = resolver->resolve(); + failed_addr.setFail(); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.failed]); + ASSERT_EQ(addresses.size(), CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); + + for (size_t i = 0; i < 1000; ++i) + { + auto next_addr = resolver->resolve(); + + ASSERT_TRUE(addresses.contains(*next_addr)); + ASSERT_NE(*next_addr, *failed_addr); + } +} + +TEST_F(ResolvePoolTest, CanFailAndHeal) +{ + auto resolver = make_resolver(); + + auto failed_addr = resolver->resolve(); + failed_addr.setFail(); + + while (true) + { + auto next_addr = resolver->resolve(); + if (*failed_addr == *next_addr) + break; + } +} + + +TEST_F(ResolvePoolTest, CanExpire) +{ + auto resolver = make_resolver(); + + auto expired_addr = resolver->resolve(); + ASSERT_TRUE(addresses.contains(*expired_addr)); + + addresses.erase(*expired_addr); + sleepForSeconds(1); + + for (size_t i = 0; i < 1000; ++i) + { + auto next_addr = resolver->resolve(); + + ASSERT_TRUE(addresses.contains(*next_addr)); + ASSERT_NE(*next_addr, *expired_addr); + } + + ASSERT_EQ(addresses.size() + 1, DB::CurrentThread::getProfileEvents()[metrics.discovered]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); +} diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index fc478ae4f41..c201bab6063 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -128,8 +128,17 @@ namespace DB M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ + M(UInt64, disk_connections_soft_limit, 1000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_store_limit, 12000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ + M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ + M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ + M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ + M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ - /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp +/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fceee63d4bb..d70a6cf51c5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -110,7 +110,7 @@ class IColumn; M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ - M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. This only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \ + M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 2f1da7935e6..e680c02671a 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,7 +85,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"24.3", {{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, + {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, + {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index bf42b7931ed..dae8ec06d30 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -88,20 +88,18 @@ void HTTPDictionarySource::getUpdateFieldAndDate(Poco::URI & uri) QueryPipeline HTTPDictionarySource::loadAll() { LOG_TRACE(log, "loadAll {}", toString()); - Poco::URI uri(configuration.url); - auto in_ptr = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_GET, - ReadWriteBufferFromHTTP::OutStreamCallback(), - timeouts, - credentials, - 0, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - configuration.header_entries, - nullptr, false); - return createWrappedBuffer(std::move(in_ptr)); + Poco::URI uri(configuration.url); + + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withSettings(context->getReadSettings()) + .withTimeouts(timeouts) + .withHeaders(configuration.header_entries) + .withDelayInit(false) + .create(credentials); + + return createWrappedBuffer(std::move(buf)); } QueryPipeline HTTPDictionarySource::loadUpdatedAll() @@ -109,19 +107,16 @@ QueryPipeline HTTPDictionarySource::loadUpdatedAll() Poco::URI uri(configuration.url); getUpdateFieldAndDate(uri); LOG_TRACE(log, "loadUpdatedAll {}", uri.toString()); - auto in_ptr = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_GET, - ReadWriteBufferFromHTTP::OutStreamCallback(), - timeouts, - credentials, - 0, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - configuration.header_entries, - nullptr, false); - return createWrappedBuffer(std::move(in_ptr)); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withSettings(context->getReadSettings()) + .withTimeouts(timeouts) + .withHeaders(configuration.header_entries) + .withDelayInit(false) + .create(credentials); + + return createWrappedBuffer(std::move(buf)); } QueryPipeline HTTPDictionarySource::loadIds(const std::vector & ids) @@ -139,19 +134,18 @@ QueryPipeline HTTPDictionarySource::loadIds(const std::vector & ids) }; Poco::URI uri(configuration.url); - auto in_ptr = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - out_stream_callback, - timeouts, - credentials, - 0, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - configuration.header_entries, - nullptr, false); - return createWrappedBuffer(std::move(in_ptr)); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withSettings(context->getReadSettings()) + .withTimeouts(timeouts) + .withHeaders(configuration.header_entries) + .withOutCallback(std::move(out_stream_callback)) + .withDelayInit(false) + .create(credentials); + + return createWrappedBuffer(std::move(buf)); } QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -169,19 +163,18 @@ QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const }; Poco::URI uri(configuration.url); - auto in_ptr = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - out_stream_callback, - timeouts, - credentials, - 0, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - configuration.header_entries, - nullptr, false); - return createWrappedBuffer(std::move(in_ptr)); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withSettings(context->getReadSettings()) + .withTimeouts(timeouts) + .withHeaders(configuration.header_entries) + .withOutCallback(std::move(out_stream_callback)) + .withDelayInit(false) + .create(credentials); + + return createWrappedBuffer(std::move(buf)); } bool HTTPDictionarySource::isModified() const diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 70fe889a8ea..1ebfc4a29b0 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -203,7 +203,7 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request) } -QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & url, const Block & required_sample_block, const std::string & query) const +QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & uri, const Block & required_sample_block, const std::string & query) const { bridge_helper->startBridgeSync(); @@ -214,10 +214,15 @@ QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & url, const B os << "query=" << escapeForFileName(query); }; - auto read_buf = std::make_unique( - url, Poco::Net::HTTPRequest::HTTP_POST, write_body_callback, timeouts, credentials); - auto format = getContext()->getInputFormat(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, required_sample_block, max_block_size); - format->addBuffer(std::move(read_buf)); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(timeouts) + .withOutCallback(std::move(write_body_callback)) + .create(credentials); + + auto format = getContext()->getInputFormat(IXDBCBridgeHelper::DEFAULT_FORMAT, *buf, required_sample_block, max_block_size); + format->addBuffer(std::move(buf)); return QueryPipeline(std::move(format)); } diff --git a/src/Dictionaries/XDBCDictionarySource.h b/src/Dictionaries/XDBCDictionarySource.h index 6011563c522..64d22807254 100644 --- a/src/Dictionaries/XDBCDictionarySource.h +++ b/src/Dictionaries/XDBCDictionarySource.h @@ -74,7 +74,7 @@ private: // execute invalidate_query. expects single cell in result std::string doInvalidateQuery(const std::string & request) const; - QueryPipeline loadFromQuery(const Poco::URI & url, const Block & required_sample_block, const std::string & query) const; + QueryPipeline loadFromQuery(const Poco::URI & uri, const Block & required_sample_block, const std::string & query) const; LoggerPtr log; diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index cc872392738..7509aa81d75 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -1,8 +1,6 @@ #include "ReadBufferFromWebServer.h" #include -#include -#include #include #include #include @@ -45,12 +43,6 @@ std::unique_ptr ReadBufferFromWebServer::initialize() { if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); - - LOG_DEBUG(log, "Reading with range: {}-{}", offset, read_until_position); - } - else - { - LOG_DEBUG(log, "Reading from offset: {}", offset); } const auto & settings = context->getSettingsRef(); @@ -60,19 +52,14 @@ std::unique_ptr ReadBufferFromWebServer::initialize() connection_timeouts.withConnectionTimeout(std::max(settings.http_connection_timeout, Poco::Timespan(20, 0))); connection_timeouts.withReceiveTimeout(std::max(settings.http_receive_timeout, Poco::Timespan(20, 0))); - auto res = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_GET, - ReadWriteBufferFromHTTP::OutStreamCallback(), - connection_timeouts, - credentials, - 0, - buf_size, - read_settings, - HTTPHeaderEntries{}, - &context->getRemoteHostFilter(), - /* delay_initialization */true, - use_external_buffer); + auto res = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::DISK) + .withSettings(read_settings) + .withTimeouts(connection_timeouts) + .withBufSize(buf_size) + .withHostFilter(&context->getRemoteHostFilter()) + .withExternalBuf(use_external_buffer) + .create(credentials); if (read_until_position) res->setReadUntilPosition(read_until_position); @@ -101,44 +88,44 @@ bool ReadBufferFromWebServer::nextImpl() throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); } - if (impl) - { - if (!use_external_buffer) - { - /** - * impl was initialized before, pass position() to it to make - * sure there is no pending data which was not read, because - * this branch means we read sequentially. - */ - impl->position() = position(); - assert(!impl->hasPendingData()); - } - } - else + if (!impl) { impl = initialize(); + + if (!use_external_buffer) + { + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); + } } if (use_external_buffer) { - /** - * use_external_buffer -- means we read into the buffer which - * was passed to us from somewhere else. We do not check whether - * previously returned buffer was read or not, because this branch - * means we are prefetching data, each nextImpl() call we can fill - * a different buffer. - */ impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + } + else + { + impl->position() = position(); } + chassert(available() == 0); + + chassert(pos >= working_buffer.begin()); + chassert(pos <= working_buffer.end()); + + chassert(working_buffer.begin() != nullptr); + chassert(impl->buffer().begin() != nullptr); + chassert(working_buffer.begin() == impl->buffer().begin()); + + chassert(impl->available() == 0); + auto result = impl->next(); + + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); + + chassert(working_buffer.begin() == impl->buffer().begin()); + if (result) - { - BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); offset += working_buffer.size(); - } return result; } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 4fd4b17aabe..7cc29bf1da2 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -67,10 +67,6 @@ std::unique_ptr getClient( client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); client_configuration.endpointOverride = uri.endpoint; - client_configuration.http_keep_alive_timeout_ms = config.getUInt( - config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); - client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); - client_configuration.wait_on_pool_size_limit = false; client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 48de0bf4168..0bad668a404 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -44,34 +44,33 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lockgetSettingsRef(), - getContext()->getServerSettings().keep_alive_timeout), - credentials, - /* max_redirects= */ 0, - /* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE, - getContext()->getReadSettings()); + auto timeouts = ConnectionTimeouts::getHTTPTimeouts( + getContext()->getSettingsRef(), + getContext()->getServerSettings().keep_alive_timeout); + + auto metadata_buf = BuilderRWBufferFromHTTP(Poco::URI(fs::path(full_url) / ".index")) + .withConnectionGroup(HTTPConnectionGroupType::DISK) + .withSettings(getContext()->getReadSettings()) + .withTimeouts(timeouts) + .withHostFilter(&getContext()->getRemoteHostFilter()) + .create(credentials); String file_name; - while (!metadata_buf.eof()) + while (!metadata_buf->eof()) { - readText(file_name, metadata_buf); - assertChar('\t', metadata_buf); + readText(file_name, *metadata_buf); + assertChar('\t', *metadata_buf); bool is_directory; - readBoolText(is_directory, metadata_buf); + readBoolText(is_directory, *metadata_buf); size_t size = 0; if (!is_directory) { - assertChar('\t', metadata_buf); - readIntText(size, metadata_buf); + assertChar('\t', *metadata_buf); + readIntText(size, *metadata_buf); } - assertChar('\n', metadata_buf); + assertChar('\n', *metadata_buf); FileDataPtr file_data = is_directory ? FileData::createDirectoryInfo(false) diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index a052f20d6fa..6679646fef4 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -122,7 +122,7 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { std::string_view sqid = col_non_const->getDataAt(i).toView(); - std::vector integers = sqids.decode(sqid); + std::vector integers = sqids.decode(String(sqid)); res_nested_data.insert(integers.begin(), integers.end()); res_offsets_data.push_back(integers.size()); } diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index f2db3169400..c4b636103fe 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -141,4 +141,19 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method .withReceiveTimeout(saturate(recv, receive_timeout)); } +void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) +{ + session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); + session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); +} + +ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session) +{ + return ConnectionTimeouts() + .withConnectionTimeout(session.getConnectionTimeout()) + .withSendTimeout(session.getSendTimeout()) + .withReceiveTimeout(session.getReceiveTimeout()) + .withHTTPKeepAliveTimeout(session.getKeepAliveTimeout()); +} + } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 7fe97b5ec36..2b2ab0e7ab8 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -4,6 +4,7 @@ #include #include +#include #include namespace DB @@ -111,4 +112,7 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time return *this; } +void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); +ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session); + } diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index c4468a1b896..09f7724d613 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -2,13 +2,7 @@ #include #include -#include -#include #include -#include -#include -#include -#include #include "config.h" @@ -25,338 +19,18 @@ #include -#include -#include +#include #include #include -namespace ProfileEvents -{ - extern const Event CreatedHTTPConnections; -} - namespace DB { + namespace ErrorCodes { extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; - extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; - extern const int UNSUPPORTED_URI_SCHEME; - extern const int LOGICAL_ERROR; -} - - -namespace -{ - Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration) - { - Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; - - poco_proxy_config.host = proxy_configuration.host; - poco_proxy_config.port = proxy_configuration.port; - poco_proxy_config.protocol = ProxyConfiguration::protocolToString(proxy_configuration.protocol); - poco_proxy_config.tunnel = proxy_configuration.tunneling; - poco_proxy_config.originalRequestProtocol = ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol); - - return poco_proxy_config; - } - - template - requires std::derived_from - class HTTPSessionAdapter : public Session - { - static_assert(std::has_virtual_destructor_v, "The base class must have a virtual destructor"); - - public: - HTTPSessionAdapter(const std::string & host, UInt16 port) : Session(host, port), log{getLogger("HTTPSessionAdapter")} { } - ~HTTPSessionAdapter() override = default; - - protected: - void reconnect() override - { - // First of all will try to establish connection with last used addr. - if (!Session::getResolvedHost().empty()) - { - try - { - Session::reconnect(); - return; - } - catch (...) - { - Session::close(); - LOG_TRACE( - log, - "Last ip ({}) is unreachable for {}:{}. Will try another resolved address.", - Session::getResolvedHost(), - Session::getHost(), - Session::getPort()); - } - } - - const auto endpoinds = DNSResolver::instance().resolveHostAll(Session::getHost()); - - for (auto it = endpoinds.begin();;) - { - try - { - Session::setResolvedHost(it->toString()); - Session::reconnect(); - - LOG_TRACE( - log, - "Created HTTP(S) session with {}:{} ({}:{})", - Session::getHost(), - Session::getPort(), - it->toString(), - Session::getPort()); - - break; - } - catch (...) - { - Session::close(); - if (++it == endpoinds.end()) - { - Session::setResolvedHost(""); - throw; - } - LOG_TRACE( - log, - "Failed to create connection with {}:{}, Will try another resolved address. {}", - Session::getResolvedHost(), - Session::getPort(), - getCurrentExceptionMessage(false)); - } - } - } - LoggerPtr log; - }; - - bool isHTTPS(const Poco::URI & uri) - { - if (uri.getScheme() == "https") - return true; - else if (uri.getScheme() == "http") - return false; - else - throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString()); - } - - HTTPSessionPtr makeHTTPSessionImpl( - const std::string & host, - UInt16 port, - bool https, - bool keep_alive, - DB::ProxyConfiguration proxy_configuration = {}) - { - HTTPSessionPtr session; - - if (!proxy_configuration.host.empty()) - { - bool is_proxy_http_and_is_tunneling_off = DB::ProxyConfiguration::Protocol::HTTP == proxy_configuration.protocol - && !proxy_configuration.tunneling; - - // If it is an HTTPS request, proxy server is HTTP and user opted for tunneling off, we must not create an HTTPS request. - // The desired flow is: HTTP request to the proxy server, then proxy server will initiate an HTTPS request to the target server. - // There is a weak link in the security, but that's what the user opted for. - if (https && is_proxy_http_and_is_tunneling_off) - { - https = false; - } - } - - if (https) - { -#if USE_SSL - session = std::make_shared>(host, port); -#else - throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "ClickHouse was built without HTTPS support"); -#endif - } - else - { - session = std::make_shared>(host, port); - } - - ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); - - /// doesn't work properly without patch - session->setKeepAlive(keep_alive); - - if (!proxy_configuration.host.empty()) - { - session->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); - } - - return session; - } - - class SingleEndpointHTTPSessionPool : public PoolBase - { - private: - const std::string host; - const UInt16 port; - const bool https; - ProxyConfiguration proxy_config; - - using Base = PoolBase; - - ObjectPtr allocObject() override - { - /// Pool is global, we shouldn't attribute this memory to query/user. - MemoryTrackerSwitcher switcher{&total_memory_tracker}; - - auto session = makeHTTPSessionImpl(host, port, https, true, proxy_config); - return session; - } - - public: - SingleEndpointHTTPSessionPool( - const std::string & host_, - UInt16 port_, - bool https_, - ProxyConfiguration proxy_config_, - size_t max_pool_size_, - bool wait_on_pool_size_limit) - : Base( - static_cast(max_pool_size_), - getLogger("HTTPSessionPool"), - wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) - , host(host_) - , port(port_) - , https(https_) - , proxy_config(proxy_config_) - { - } - }; - - class HTTPSessionPool : private boost::noncopyable - { - public: - struct Key - { - String target_host; - UInt16 target_port; - bool is_target_https; - ProxyConfiguration proxy_config; - bool wait_on_pool_size_limit; - - bool operator ==(const Key & rhs) const - { - return std::tie( - target_host, - target_port, - is_target_https, - proxy_config.host, - proxy_config.port, - proxy_config.protocol, - proxy_config.tunneling, - proxy_config.original_request_protocol, - wait_on_pool_size_limit) - == std::tie( - rhs.target_host, - rhs.target_port, - rhs.is_target_https, - rhs.proxy_config.host, - rhs.proxy_config.port, - rhs.proxy_config.protocol, - rhs.proxy_config.tunneling, - rhs.proxy_config.original_request_protocol, - rhs.wait_on_pool_size_limit); - } - }; - - private: - using PoolPtr = std::shared_ptr; - using Entry = SingleEndpointHTTPSessionPool::Entry; - - struct Hasher - { - size_t operator()(const Key & k) const - { - SipHash s; - s.update(k.target_host); - s.update(k.target_port); - s.update(k.is_target_https); - s.update(k.proxy_config.host); - s.update(k.proxy_config.port); - s.update(k.proxy_config.protocol); - s.update(k.proxy_config.tunneling); - s.update(k.proxy_config.original_request_protocol); - s.update(k.wait_on_pool_size_limit); - return s.get64(); - } - }; - - std::mutex mutex; - std::unordered_map endpoints_pool; - - protected: - HTTPSessionPool() = default; - - public: - static auto & instance() - { - static HTTPSessionPool instance; - return instance; - } - - Entry getSession( - const Poco::URI & uri, - const ProxyConfiguration & proxy_config, - const ConnectionTimeouts & timeouts, - size_t max_connections_per_endpoint, - bool wait_on_pool_size_limit) - { - std::unique_lock lock(mutex); - const std::string & host = uri.getHost(); - UInt16 port = uri.getPort(); - bool https = isHTTPS(uri); - - HTTPSessionPool::Key key{host, port, https, proxy_config, wait_on_pool_size_limit}; - auto pool_ptr = endpoints_pool.find(key); - if (pool_ptr == endpoints_pool.end()) - std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( - key, - std::make_shared( - host, - port, - https, - proxy_config, - max_connections_per_endpoint, - wait_on_pool_size_limit)); - - /// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame. - /// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any - /// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`. - /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`. - lock.unlock(); - - auto retry_timeout = timeouts.connection_timeout.totalMilliseconds(); - auto session = pool_ptr->second->get(retry_timeout); - - const auto & session_data = session->sessionData(); - if (session_data.empty() || !Poco::AnyCast(&session_data)) - { - /// Reset session if it is not reusable. See comment for HTTPSessionReuseTag. - session->reset(); - } - session->attachSessionData({}); - - setTimeouts(*session, timeouts); - - return session; - } - }; -} - -void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) -{ - session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); - session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); } void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout) @@ -370,28 +44,13 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ } HTTPSessionPtr makeHTTPSession( + HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_configuration -) + ProxyConfiguration proxy_configuration) { - const std::string & host = uri.getHost(); - UInt16 port = uri.getPort(); - bool https = isHTTPS(uri); - - auto session = makeHTTPSessionImpl(host, port, https, false, proxy_configuration); - setTimeouts(*session, timeouts); - return session; -} - -PooledHTTPSessionPtr makePooledHTTPSession( - const Poco::URI & uri, - const ConnectionTimeouts & timeouts, - size_t per_endpoint_pool_size, - bool wait_on_pool_size_limit, - ProxyConfiguration proxy_config) -{ - return HTTPSessionPool::instance().getSession(uri, proxy_config, timeouts, per_endpoint_pool_size, wait_on_pool_size_limit); + auto connection_pool = HTTPConnectionPools::instance().getPool(group, uri, proxy_configuration); + return connection_pool->getConnection(timeouts); } bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; } @@ -400,11 +59,11 @@ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, const bool allow_redirects) { auto & istr = session.receiveResponse(response); - assertResponseIsOk(request, response, istr, allow_redirects); + assertResponseIsOk(request.getURI(), response, istr, allow_redirects); return &istr; } -void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects) +void assertResponseIsOk(const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects) { auto status = response.getStatus(); @@ -422,7 +81,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR body.exceptions(std::ios::failbit); body << istr.rdbuf(); - throw HTTPException(code, request.getURI(), status, response.getReason(), body.str()); + throw HTTPException(code, uri, status, response.getReason(), body.str()); } } @@ -440,24 +99,4 @@ Exception HTTPException::makeExceptionMessage( uri, static_cast(http_status), reason, body); } -void markSessionForReuse(Poco::Net::HTTPSession & session) -{ - const auto & session_data = session.sessionData(); - if (!session_data.empty() && !Poco::AnyCast(&session_data)) - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name()); - - session.attachSessionData(HTTPSessionReuseTag{}); -} - -void markSessionForReuse(HTTPSessionPtr session) -{ - markSessionForReuse(*session); -} - -void markSessionForReuse(PooledHTTPSessionPtr session) -{ - markSessionForReuse(static_cast(*session)); -} - } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index c9968fc6915..e27269e2559 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -7,9 +7,9 @@ #include #include #include -#include -#include #include +#include +#include #include @@ -36,7 +36,7 @@ public: HTTPException * clone() const override { return new HTTPException(*this); } void rethrow() const override { throw *this; } - int getHTTPStatus() const { return http_status; } + Poco::Net::HTTPResponse::HTTPStatus getHTTPStatus() const { return http_status; } private: Poco::Net::HTTPResponse::HTTPStatus http_status{}; @@ -52,55 +52,18 @@ private: const char * className() const noexcept override { return "DB::HTTPException"; } }; -using PooledHTTPSessionPtr = PoolBase::Entry; // SingleEndpointHTTPSessionPool::Entry using HTTPSessionPtr = std::shared_ptr; -/// If a session have this tag attached, it will be reused without calling `reset()` on it. -/// All pooled sessions don't have this tag attached after being taken from a pool. -/// If the request and the response were fully written/read, the client code should add this tag -/// explicitly by calling `markSessionForReuse()`. -/// -/// Note that HTTP response may contain extra bytes after the last byte of the payload. Specifically, -/// when chunked encoding is used, there's an empty chunk at the end. Those extra bytes must also be -/// read before the session can be reused. So we usually put an `istr->ignore(INT64_MAX)` call -/// before `markSessionForReuse()`. -struct HTTPSessionReuseTag -{ -}; - -void markSessionForReuse(Poco::Net::HTTPSession & session); -void markSessionForReuse(HTTPSessionPtr session); -void markSessionForReuse(PooledHTTPSessionPtr session); - - void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession( + HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, ProxyConfiguration proxy_config = {} ); -/// As previous method creates session, but takes it from pool, without and with proxy uri. -/// -/// The max_connections_per_endpoint parameter makes it look like the pool size can be different for -/// different requests (whatever that means), but actually we just assign the endpoint's connection -/// pool size when we see the endpoint for the first time, then we never change it. -/// We should probably change how this configuration works, and how this pooling works in general: -/// * Make the per_endpoint_pool_size be a global server setting instead of per-disk or per-query. -/// * Have boolean per-disk/per-query settings for enabling/disabling pooling. -/// * Add a limit on the number of endpoints and the total number of sessions across all endpoints. -/// * Enable pooling by default everywhere. In particular StorageURL and StorageS3. -/// (Enabling it for StorageURL is scary without the previous item - the user may query lots of -/// different endpoints. So currently pooling is mainly used for S3.) -PooledHTTPSessionPtr makePooledHTTPSession( - const Poco::URI & uri, - const ConnectionTimeouts & timeouts, - size_t per_endpoint_pool_size, - bool wait_on_pool_size_limit = true, - ProxyConfiguration proxy_config = {}); - bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); /** Used to receive response (response headers and possibly body) @@ -112,7 +75,6 @@ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects); void assertResponseIsOk( - const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false); + const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false); -void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); } diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp index e14112f8d19..84c7ac86227 100644 --- a/src/IO/LimitReadBuffer.cpp +++ b/src/IO/LimitReadBuffer.cpp @@ -1,5 +1,4 @@ #include - #include @@ -15,7 +14,7 @@ namespace ErrorCodes bool LimitReadBuffer::nextImpl() { - assert(position() >= in->position()); + chassert(position() >= in->position()); /// Let underlying buffer calculate read bytes in `next()` call. in->position() = position(); @@ -39,20 +38,18 @@ bool LimitReadBuffer::nextImpl() if (exact_limit && bytes != *exact_limit) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, *exact_limit); /// Clearing the buffer with existing data. - set(in->position(), 0); + BufferBase::set(in->position(), 0, 0); + return false; } - working_buffer = in->buffer(); - - if (limit - bytes < working_buffer.size()) - working_buffer.resize(limit - bytes); + BufferBase::set(in->position(), std::min(in->available(), limit - bytes), 0); return true; } -LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : ReadBuffer(in_ ? in_->position() : nullptr, 0) , in(in_) @@ -62,24 +59,20 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, boo , exact_limit(exact_limit_) , exception_message(std::move(exception_message_)) { - assert(in); + chassert(in); - size_t remaining_bytes_in_buffer = in->buffer().end() - in->position(); - if (remaining_bytes_in_buffer > limit) - remaining_bytes_in_buffer = limit; - - working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer); + BufferBase::set(in->position(), std::min(in->available(), limit), 0); } -LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : LimitReadBuffer(&in_, false, limit_, throw_exception_, exact_limit_, exception_message_) { } -LimitReadBuffer::LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(std::unique_ptr in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exact_limit_, exception_message_) { diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h index 15885c1d850..b869f2935fb 100644 --- a/src/IO/LimitReadBuffer.h +++ b/src/IO/LimitReadBuffer.h @@ -13,22 +13,24 @@ namespace DB class LimitReadBuffer : public ReadBuffer { public: - LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, + LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_ = {}); - LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, std::optional exact_limit_, + LimitReadBuffer(std::unique_ptr in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_ = {}); ~LimitReadBuffer() override; private: ReadBuffer * in; - bool owns_in; + const bool owns_in; - UInt64 limit; - bool throw_exception; - std::optional exact_limit; - std::string exception_message; + const size_t limit; + const bool throw_exception; + const std::optional exact_limit; + const std::string exception_message; - LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_); + LoggerPtr log; + + LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_); bool nextImpl() override; }; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index 9b1c132cc01..f27828f71b2 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -92,7 +92,7 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize() return getSizeFromFileDescriptor(getFD(), getFileName()); } -size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) const { if (offset >= mapped.getLength()) return 0; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index 2a039e04971..f774538374a 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -40,7 +40,7 @@ public: size_t getFileSize() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return true; } }; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 00325734354..056e25a5fbe 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -63,21 +63,23 @@ public: */ bool next() { - assert(!hasPendingData()); - assert(position() <= working_buffer.end()); + chassert(!hasPendingData()); + chassert(position() <= working_buffer.end()); bytes += offset(); bool res = nextImpl(); if (!res) + { working_buffer = Buffer(pos, pos); + } else { - pos = working_buffer.begin() + nextimpl_working_buffer_offset; - assert(position() != working_buffer.end()); + pos = working_buffer.begin() + std::min(nextimpl_working_buffer_offset, working_buffer.size()); + chassert(position() < working_buffer.end()); } nextimpl_working_buffer_offset = 0; - assert(position() <= working_buffer.end()); + chassert(position() <= working_buffer.end()); return res; } diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 3211f8eeb35..57442a15853 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -49,7 +49,7 @@ std::string ReadBufferFromFileDescriptor::getFileName() const } -size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) +size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const { chassert(min_bytes <= max_bytes); @@ -265,7 +265,7 @@ bool ReadBufferFromFileDescriptor::checkIfActuallySeekable() return res == 0 && S_ISREG(stat.st_mode); } -size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) const { chassert(use_pread); return readImpl(to, n, n, offset); diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 4762998c67b..db256ef91c7 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -34,7 +34,7 @@ protected: /// Doesn't seek (`offset` must match fd's position if !use_pread). /// Stops after min_bytes or eof. Returns 0 if eof. /// Thread safe. - size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset); + size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const; public: explicit ReadBufferFromFileDescriptor( @@ -73,7 +73,7 @@ public: bool checkIfActuallySeekable() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return use_pread; } }; diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index 52546f1703d..bc90ec7ed15 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -5,52 +5,44 @@ namespace DB { -namespace ErrorCodes -{ - extern const int CANNOT_READ_FROM_ISTREAM; -} - bool ReadBufferFromIStream::nextImpl() { if (eof) return false; + chassert(internal_buffer.begin() != nullptr); + chassert(!internal_buffer.empty()); + size_t bytes_read = 0; char * read_to = internal_buffer.begin(); /// It is necessary to read in a loop, since socket usually returns only data available at the moment. while (bytes_read < internal_buffer.size()) { - try + const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read); + if (bytes_read_last_time <= 0) { - const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read); - if (bytes_read_last_time <= 0) - { - eof = true; - break; - } + eof = true; + break; + } - bytes_read += bytes_read_last_time; - read_to += bytes_read_last_time; - } - catch (...) - { - throw Exception( - ErrorCodes::CANNOT_READ_FROM_ISTREAM, - "Cannot read from istream at offset {}: {}", - count(), - getCurrentExceptionMessage(/*with_stacktrace=*/true)); - } + bytes_read += bytes_read_last_time; + read_to += bytes_read_last_time; } if (bytes_read) + { + working_buffer = internal_buffer; working_buffer.resize(bytes_read); + } return bytes_read; } ReadBufferFromIStream::ReadBufferFromIStream(std::istream & istr_, size_t size) - : BufferWithOwnMemory(size), istr(istr_), stream_buf(dynamic_cast(*istr.rdbuf())) + : BufferWithOwnMemory(size) + , istr(istr_) + , stream_buf(dynamic_cast(*istr.rdbuf())) { } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 4529771e7b2..491ff253066 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -34,61 +34,6 @@ namespace ProfileEvents extern const Event RemoteReadThrottlerSleepMicroseconds; } -namespace -{ -DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result) -{ - if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) - return static_cast(session_aware_stream->getSession()); - - if (dynamic_cast *>(&read_result.GetBody())) - return {}; - - /// accept result from S# mock in gtest_writebuffer_s3.cpp - if (dynamic_cast(&read_result.GetBody())) - return {}; - - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); -} - -void resetSession(Aws::S3::Model::GetObjectResult & read_result) -{ - if (auto session = getSession(read_result); !session.isNull()) - { - auto & http_session = static_cast(*session); - http_session.reset(); - } -} - -void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) -{ - if (!read_result) - return; - - if (!read_all_range_successfully) - { - /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete - /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. - resetSession(*read_result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - else if (auto session = getSession(*read_result); !session.isNull()) - { - if (!session->getProxyHost().empty()) - { - /// Reset proxified sessions because proxy can change for every request. See ProxyConfigurationResolver. - resetSession(*read_result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - else - { - DB::markSessionForReuse(session); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); - } - } -} -} - namespace DB { namespace ErrorCodes @@ -228,7 +173,7 @@ bool ReadBufferFromS3::nextImpl() } -size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) +size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const { size_t initial_n = n; size_t sleep_time_with_backoff_milliseconds = 100; @@ -240,29 +185,6 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromS3Microseconds); std::optional result; - /// Connection is reusable if we've read the full response. - bool session_is_reusable = false; - SCOPE_EXIT( - { - if (!result.has_value()) - return; - if (session_is_reusable) - { - auto session = getSession(*result); - if (!session.isNull()) - { - DB::markSessionForReuse(session); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); - } - else - session_is_reusable = false; - } - if (!session_is_reusable) - { - resetSession(*result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - }); try { @@ -276,9 +198,8 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons if (read_settings.remote_throttler) read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); - /// Read remaining bytes after the end of the payload, see HTTPSessionReuseTag. + /// Read remaining bytes after the end of the payload istr.ignore(INT64_MAX); - session_is_reusable = true; } catch (Poco::Exception & e) { @@ -451,21 +372,8 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() return false; } -ReadBufferFromS3::~ReadBufferFromS3() -{ - try - { - resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); - } - catch (...) - { - tryLogCurrentException(log); - } -} - std::unique_ptr ReadBufferFromS3::initialize(size_t attempt) { - resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); read_all_range_successfully = false; /** @@ -534,10 +442,6 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si } } -bool ReadBufferFromS3::readAllRangeSuccessfully() const -{ - return read_until_position ? offset == read_until_position : read_all_range_successfully; -} } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index f28c23a71d7..003c88df7d2 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -55,7 +55,7 @@ public: bool restricted_seek_ = false, std::optional file_size = std::nullopt); - ~ReadBufferFromS3() override; + ~ReadBufferFromS3() override = default; bool nextImpl() override; @@ -74,7 +74,7 @@ public: String getFileName() const override { return bucket + "/" + key; } - size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const override; bool supportsReadAt() override { return true; } @@ -90,8 +90,6 @@ private: Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional range_end_incl) const; - bool readAllRangeSuccessfully() const; - ReadSettings read_settings; bool use_external_buffer; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 31ea45d92a9..38904df4403 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -122,7 +122,7 @@ struct ReadSettings // Resource to be used during reading ResourceLink resource_link; - size_t http_max_tries = 1; + size_t http_max_tries = 10; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; bool http_skip_not_found_url_for_globs = true; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 15c2a0a021b..bcbec97537a 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,13 +1,65 @@ #include "ReadWriteBufferFromHTTP.h" #include +#include +#include + namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; - extern const Event ReadWriteBufferFromHTTPPreservedSessions; } + +namespace +{ + +bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept +{ + static constexpr std::array non_retriable_errors{ + Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; + + return std::all_of( + non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; }); +} + +Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response) +{ + chassert(DB::isRedirect(response.getStatus())); + + auto location = response.get("Location"); + auto location_uri = Poco::URI(location); + if (!location_uri.isRelative()) + return location_uri; + /// Location header contains relative path. So we need to concatenate it + /// with path from the original URI and normalize it. + auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location); + location_uri = prev_uri; + location_uri.setPath(path); + return location_uri; +} + +class ReadBufferFromSessionResponse : public DB::ReadBufferFromIStream +{ +private: + DB::HTTPSessionPtr session; + +public: + ReadBufferFromSessionResponse(DB::HTTPSessionPtr && session_, std::istream & rstr, size_t size) + : ReadBufferFromIStream(rstr, size) + , session(std::move(session_)) + { + } +}; + +} + + namespace DB { @@ -21,94 +73,29 @@ namespace ErrorCodes extern const int UNKNOWN_FILE_SIZE; } -template -UpdatableSession::UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr session_factory_) - : max_redirects{max_redirects_} - , initial_uri(uri) - , session_factory(std::move(session_factory_)) +std::unique_ptr ReadWriteBufferFromHTTP::CallResult::transformToReadBuffer(size_t buf_size) && { - session = session_factory->buildNewSession(uri); + chassert(session); + return std::make_unique(std::move(session), *response_stream, buf_size); } -template -typename UpdatableSession::SessionPtr UpdatableSession::getSession() { return session; } - -template -void UpdatableSession::updateSession(const Poco::URI & uri) -{ - ++redirects; - if (redirects <= max_redirects) - session = session_factory->buildNewSession(uri); - else - throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, - "Too many redirects while trying to access {}." - " You can {} redirects by changing the setting 'max_http_get_redirects'." - " Example: `SET max_http_get_redirects = 10`." - " Redirects are restricted to prevent possible attack when a malicious server redirects to an internal resource, bypassing the authentication or firewall.", - initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); -} - -template -typename UpdatableSession::SessionPtr UpdatableSession::createDetachedSession(const Poco::URI & uri) -{ - return session_factory->buildNewSession(uri); -} - -template -std::shared_ptr> UpdatableSession::clone(const Poco::URI & uri) -{ - return std::make_shared>(uri, max_redirects, session_factory); -} - - -namespace detail -{ - -static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept -{ - static constexpr std::array non_retriable_errors{ - Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; - - return std::all_of( - non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; }); -} - -static Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response) -{ - auto location = response.get("Location"); - auto location_uri = Poco::URI(location); - if (!location_uri.isRelative()) - return location_uri; - /// Location header contains relative path. So we need to concatenate it - /// with path from the original URI and normalize it. - auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location); - location_uri = prev_uri; - location_uri.setPath(path); - return location_uri; -} - -template -bool ReadWriteBufferFromHTTPBase::withPartialContent(const HTTPRange & range) const +bool ReadWriteBufferFromHTTP::withPartialContent() const { /** * Add range header if we have some passed range * or if we want to retry GET request on purpose. */ - return range.begin || range.end || retry_with_range_header; + return read_range.begin || read_range.end || getOffset() > 0; } -template -size_t ReadWriteBufferFromHTTPBase::getOffset() const { return read_range.begin.value_or(0) + offset_from_begin_pos; } - -template -void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const +size_t ReadWriteBufferFromHTTP::getOffset() const { - request.setHost(uri_.getHost()); // use original, not resolved host name in header + return read_range.begin.value_or(0) + offset_from_begin_pos; +} + +void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, std::optional range) const +{ + request.setHost(initial_uri.getHost()); // use original, not resolved host name in header if (out_stream_callback) request.setChunkedTransferEncoding(true); @@ -125,7 +112,6 @@ void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net: range_header_value = fmt::format("bytes={}-{}", *range->begin, *range->end); else range_header_value = fmt::format("bytes={}-", *range->begin); - LOG_TEST(log, "Adding header: Range: {}", range_header_value); request.set("Range", range_header_value); } @@ -133,45 +119,7 @@ void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net: credentials.authenticate(request); } -template -std::istream * ReadWriteBufferFromHTTPBase::callImpl( - UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info) -{ - // With empty path poco will send "POST HTTP/1.1" its bug. - if (uri_.getPath().empty()) - uri_.setPath("/"); - - std::optional range; - if (!for_object_info) - { - if (withPartialContent(read_range)) - range = HTTPRange{getOffset(), read_range.end}; - } - - Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); - prepareRequest(request, uri_, range); - - LOG_TRACE(log, "Sending request to {}", uri_.toString()); - - auto sess = current_session->getSession(); - auto & stream_out = sess->sendRequest(request); - - if (out_stream_callback) - out_stream_callback(stream_out); - - auto result_istr = receiveResponse(*sess, request, response, true); - response.getCookies(cookies); - - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (!for_object_info) - content_encoding = response.get("Content-Encoding", ""); - - return result_istr; -} - -template -size_t ReadWriteBufferFromHTTPBase::getFileSize() +size_t ReadWriteBufferFromHTTP::getFileSize() { if (!file_info) file_info = getFileInfo(); @@ -179,243 +127,288 @@ size_t ReadWriteBufferFromHTTPBase::getFileSize() if (file_info->file_size) return *file_info->file_size; - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString()); + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", initial_uri.toString()); } -template -bool ReadWriteBufferFromHTTPBase::supportsReadAt() +bool ReadWriteBufferFromHTTP::supportsReadAt() { if (!file_info) file_info = getFileInfo(); return method == Poco::Net::HTTPRequest::HTTP_GET && file_info->seekable; } -template -bool ReadWriteBufferFromHTTPBase::checkIfActuallySeekable() +bool ReadWriteBufferFromHTTP::checkIfActuallySeekable() { if (!file_info) file_info = getFileInfo(); return file_info->seekable; } -template -String ReadWriteBufferFromHTTPBase::getFileName() const { return uri.toString(); } - -template -void ReadWriteBufferFromHTTPBase::getHeadResponse(Poco::Net::HTTPResponse & response) +String ReadWriteBufferFromHTTP::getFileName() const { - for (size_t i = 0; i < settings.http_max_tries; ++i) - { - try - { - callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, true, true); - break; - } - catch (const Poco::Exception & e) - { - if (i == settings.http_max_tries - 1 || e.code() == ErrorCodes::TOO_MANY_REDIRECTS || !isRetriableError(response.getStatus())) - throw; - - LOG_ERROR(log, "Failed to make HTTP_HEAD request to {}. Error: {}", uri.toString(), e.displayText()); - } - } + return initial_uri.toString(); } -template -void ReadWriteBufferFromHTTPBase::setupExternalBuffer() +void ReadWriteBufferFromHTTP::getHeadResponse(Poco::Net::HTTPResponse & response) { - /** - * use_external_buffer -- means we read into the buffer which - * was passed to us from somewhere else. We do not check whether - * previously returned buffer was read or not (no hasPendingData() check is needed), - * because this branch means we are prefetching data, - * each nextImpl() call we can fill a different buffer. - */ - impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + doWithRetries( + [&] () + { + callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, {}); + }, + /*on_retry=*/ nullptr, + /*mute_logging=*/ true); } -template -ReadWriteBufferFromHTTPBase::ReadWriteBufferFromHTTPBase( - UpdatableSessionPtr session_, - Poco::URI uri_, - const Poco::Net::HTTPBasicCredentials & credentials_, +ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( + const HTTPConnectionGroupType & connection_group_, + const Poco::URI & uri_, const std::string & method_, - OutStreamCallback out_stream_callback_, - size_t buffer_size_, - const ReadSettings & settings_, - HTTPHeaderEntries http_header_entries_, + ProxyConfiguration proxy_config_, + ReadSettings read_settings_, + ConnectionTimeouts timeouts_, + const Poco::Net::HTTPBasicCredentials & credentials_, const RemoteHostFilter * remote_host_filter_, - bool delay_initialization, + size_t buffer_size_, + size_t max_redirects_, + OutStreamCallback out_stream_callback_, bool use_external_buffer_, bool http_skip_not_found_url_, - std::optional file_info_, - ProxyConfiguration proxy_config_) + HTTPHeaderEntries http_header_entries_, + bool delay_initialization, + std::optional file_info_) : SeekableReadBuffer(nullptr, 0) - , uri {uri_} - , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} - , session {session_} - , out_stream_callback {out_stream_callback_} - , credentials {credentials_} - , http_header_entries {std::move(http_header_entries_)} - , remote_host_filter {remote_host_filter_} - , buffer_size {buffer_size_} - , use_external_buffer {use_external_buffer_} - , file_info(file_info_) + , connection_group(connection_group_) + , initial_uri(uri_) + , method(!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET) + , proxy_config(std::move(proxy_config_)) + , read_settings(std::move(read_settings_)) + , timeouts(std::move(timeouts_)) + , credentials(credentials_) + , remote_host_filter(remote_host_filter_) + , buffer_size(buffer_size_) + , max_redirects(max_redirects_) + , use_external_buffer(use_external_buffer_) , http_skip_not_found_url(http_skip_not_found_url_) - , settings {settings_} + , out_stream_callback(std::move(out_stream_callback_)) + , redirects(0) + , http_header_entries {std::move(http_header_entries_)} + , file_info(file_info_) , log(getLogger("ReadWriteBufferFromHTTP")) - , proxy_config(proxy_config_) { - if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 - || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms) + current_uri = initial_uri; + + if (current_uri.getPath().empty()) + current_uri.setPath("/"); + + if (read_settings.http_max_tries <= 0 || read_settings.http_retry_initial_backoff_ms <= 0 + || read_settings.http_retry_initial_backoff_ms >= read_settings.http_retry_max_backoff_ms) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid setting for http backoff, " "must be http_max_tries >= 1 (current is {}) and " "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", - settings.http_max_tries, - settings.http_retry_initial_backoff_ms, - settings.http_retry_max_backoff_ms); + read_settings.http_max_tries, + read_settings.http_retry_initial_backoff_ms, + read_settings.http_retry_max_backoff_ms); // Configure User-Agent if it not already set. const std::string user_agent = "User-Agent"; - auto iter = std::find_if( - http_header_entries.begin(), - http_header_entries.end(), - [&user_agent](const HTTPHeaderEntry & entry) { return entry.name == user_agent; }); + auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), + [&user_agent] (const HTTPHeaderEntry & entry) { return entry.name == user_agent; }); if (iter == http_header_entries.end()) { - http_header_entries.emplace_back("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING)); + http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING)); } + if (!delay_initialization && use_external_buffer) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid setting for ReadWriteBufferFromHTTP" + "delay_initialization is false and use_external_buffer it true."); + if (!delay_initialization) { - initialize(); - if (exception) - std::rethrow_exception(exception); + next(); } } -template -void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info) +ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callImpl( + Poco::Net::HTTPResponse & response, const Poco::URI & uri_, const std::string & method_, const std::optional & range, bool allow_redirects) const { - UpdatableSessionPtr current_session = nullptr; + if (remote_host_filter) + remote_host_filter->checkURL(uri_); - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (for_object_info) - current_session = session->clone(uri); - else - current_session = session; + Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + prepareRequest(request, range); - call(current_session, response, method_, throw_on_all_errors, for_object_info); - saved_uri_redirect = uri; + auto session = makeHTTPSession(connection_group, uri_, timeouts, proxy_config); + + auto & stream_out = session->sendRequest(request); + if (out_stream_callback) + out_stream_callback(stream_out); + + auto & resp_stream = session->receiveResponse(response); + + assertResponseIsOk(current_uri.toString(), response, resp_stream, allow_redirects); + + return ReadWriteBufferFromHTTP::CallResult(std::move(session), resp_stream); +} + +ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callWithRedirects( + Poco::Net::HTTPResponse & response, const String & method_, const std::optional & range) +{ + auto result = callImpl(response, current_uri, method_, range, true); while (isRedirect(response.getStatus())) { - Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response); - saved_uri_redirect = uri_redirect; - if (remote_host_filter) - remote_host_filter->checkURL(uri_redirect); + Poco::URI uri_redirect = getUriAfterRedirect(current_uri, response); + ++redirects; + if (redirects > max_redirects) + throw Exception( + ErrorCodes::TOO_MANY_REDIRECTS, + "Too many redirects while trying to access {}." + " You can {} redirects by changing the setting 'max_http_get_redirects'." + " Example: `SET max_http_get_redirects = 10`." + " Redirects are restricted to prevent possible attack when a malicious server redirects to an internal resource, bypassing the authentication or firewall.", + initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); - current_session->updateSession(uri_redirect); + current_uri = uri_redirect; - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - auto result_istr = callImpl(current_session, uri_redirect, response, method, for_object_info); - if (!for_object_info) - istr = result_istr; + result = callImpl(response, uri_redirect, method_, range, true); } + + return result; } -template -void ReadWriteBufferFromHTTPBase::call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info) + +void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, + std::function on_retry, + bool mute_logging) const { - try + [[maybe_unused]] auto milliseconds_to_wait = read_settings.http_retry_initial_backoff_ms; + + bool is_retriable = true; + std::exception_ptr exception = nullptr; + + for (size_t attempt = 1; attempt <= read_settings.http_max_tries; ++attempt) { - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - auto result_istr = callImpl(current_session, saved_uri_redirect ? *saved_uri_redirect : uri, response, method_, for_object_info); - if (!for_object_info) - istr = result_istr; - } - catch (...) - { - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (for_object_info) - throw; + [[maybe_unused]] bool last_attempt = attempt + 1 > read_settings.http_max_tries; - if (throw_on_all_errors) - throw; + String error_message; - auto http_status = response.getStatus(); - - if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) + try { - initialization_error = InitializeError::SKIP_NOT_FOUND_URL; + callable(); + return; } - else if (!isRetriableError(http_status)) + catch (Poco::Net::NetException & e) { - initialization_error = InitializeError::NON_RETRYABLE_ERROR; + error_message = e.displayText(); exception = std::current_exception(); } + catch (DB::NetException & e) + { + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (DB::HTTPException & e) + { + if (!isRetriableError(e.getHTTPStatus())) + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (DB::Exception & e) + { + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (Poco::Exception & e) + { + if (e.code() == POCO_EMFILE) + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + + chassert(exception); + + if (last_attempt || !is_retriable) + { + if (!mute_logging) + LOG_ERROR(log, + "Failed to make request to '{}'. Error: '{}'. " + "Failed at try {}/{}.", + initial_uri.toString(), error_message, + attempt, read_settings.http_max_tries); + + std::rethrow_exception(exception); + } else { - throw; + if (on_retry) + on_retry(); + + if (!mute_logging) + LOG_INFO(log, + "Failed to make request to `{}`. Error: {}. " + "Failed at try {}/{}. " + "Will retry with current backoff wait is {}/{} ms.", + initial_uri.toString(), error_message, + attempt + 1, read_settings.http_max_tries, + milliseconds_to_wait, read_settings.http_retry_max_backoff_ms); + + sleepForMilliseconds(milliseconds_to_wait); + milliseconds_to_wait = std::min(milliseconds_to_wait * 2, read_settings.http_retry_max_backoff_ms); } } } -template -void ReadWriteBufferFromHTTPBase::initialize() + +std::unique_ptr ReadWriteBufferFromHTTP::initialize() { Poco::Net::HTTPResponse response; - call(session, response, method); - if (initialization_error != InitializeError::NONE) - return; + std::optional range; + if (withPartialContent()) + range = HTTPRange{getOffset(), read_range.end}; - while (isRedirect(response.getStatus())) - { - Poco::URI uri_redirect = getUriAfterRedirect(saved_uri_redirect.value_or(uri), response); - if (remote_host_filter) - remote_host_filter->checkURL(uri_redirect); + auto result = callWithRedirects(response, method, range); - session->updateSession(uri_redirect); - - istr = callImpl(session, uri_redirect, response, method); - saved_uri_redirect = uri_redirect; - } - - if (response.hasContentLength()) - LOG_DEBUG(log, "Received response with content length: {}", response.getContentLength()); - - if (withPartialContent(read_range) && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) + if (range.has_value() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) { /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0. if (getOffset() != 0) { - if (!exception) + /// Retry 200OK + if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) { - exception = std::make_exception_ptr(Exception( + String reason = fmt::format( + "Cannot read with range: [{}, {}] (response status: {}, reason: {}), will retry", + *read_range.begin, read_range.end ? toString(*read_range.end) : "-", + toString(response.getStatus()), response.getReason()); + + /// it is retriable error + throw HTTPException( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + current_uri.toString(), + Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, + reason, + ""); + } + else + throw Exception( ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}] (response status: {}, reason: {})", *read_range.begin, read_range.end ? toString(*read_range.end) : "-", - toString(response.getStatus()), response.getReason())); - } - - /// Retry 200OK - if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) - initialization_error = InitializeError::RETRYABLE_ERROR; - else - initialization_error = InitializeError::NON_RETRYABLE_ERROR; - - return; + toString(response.getStatus()), response.getReason()); } else if (read_range.end) { @@ -425,257 +418,140 @@ void ReadWriteBufferFromHTTPBase::initialize() } } + response.getCookies(cookies); + content_encoding = response.get("Content-Encoding", ""); + // Remember file size. It'll be used to report eof in next nextImpl() call. if (!read_range.end && response.hasContentLength()) - file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0); + file_info = parseFileInfo(response, range.has_value() ? getOffset() : 0); - impl = std::make_unique(*istr, buffer_size); - - if (use_external_buffer) - setupExternalBuffer(); + return std::move(result).transformToReadBuffer(use_external_buffer ? 0 : buffer_size); } -template -bool ReadWriteBufferFromHTTPBase::nextImpl() +bool ReadWriteBufferFromHTTP::nextImpl() { - if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) - return false; - assert(initialization_error == InitializeError::NONE); - if (next_callback) next_callback(count()); - if ((read_range.end && getOffset() > read_range.end.value()) || - (file_info && file_info->file_size && getOffset() >= file_info->file_size.value())) - { - /// Response was fully read. - markSessionForReuse(session->getSession()); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); - return false; - } + bool next_result = false; - if (impl) - { - if (use_external_buffer) - { - setupExternalBuffer(); - } - else - { - /** - * impl was initialized before, pass position() to it to make - * sure there is no pending data which was not read. - */ - if (!working_buffer.empty()) - impl->position() = position(); - } - } - - bool result = false; - size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; - bool last_attempt = false; - - auto on_retriable_error = [&]() - { - retry_with_range_header = true; - impl.reset(); - auto http_session = session->getSession(); - http_session->reset(); - if (!last_attempt) - { - sleepForMilliseconds(milliseconds_to_wait); - milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); - } - }; - - for (size_t i = 0;; ++i) - { - if (last_attempt) - break; - last_attempt = i + 1 >= settings.http_max_tries; - - exception = nullptr; - initialization_error = InitializeError::NONE; - - try + doWithRetries( + /*callable=*/ [&] () { if (!impl) { - initialize(); - - if (initialization_error == InitializeError::NON_RETRYABLE_ERROR) + try { - assert(exception); - break; + impl = initialize(); } - else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) + catch (HTTPException & e) { - return false; - } - else if (initialization_error == InitializeError::RETRYABLE_ERROR) - { - LOG_TRACE( - log, - "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " - "(Current backoff wait is {}/{} ms)", - uri.toString(), i + 1, settings.http_max_tries, getOffset(), - read_range.end ? toString(*read_range.end) : "unknown", - milliseconds_to_wait, settings.http_retry_max_backoff_ms); + if (http_skip_not_found_url && e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND) + { + next_result = false; + return; + } - assert(exception); - on_retriable_error(); - continue; + throw; } - assert(!exception); - if (use_external_buffer) { - setupExternalBuffer(); + impl->set(internal_buffer.begin(), internal_buffer.size()); + } + else + { + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); } } - result = impl->next(); - exception = nullptr; - break; - } - catch (const Poco::Exception & e) + if (use_external_buffer) + { + impl->set(internal_buffer.begin(), internal_buffer.size()); + } + else + { + impl->position() = position(); + } + + next_result = impl->next(); + + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); + + offset_from_begin_pos += working_buffer.size(); + }, + /*on_retry=*/ [&] () { - /// Too many open files or redirects - non-retryable. - if (e.code() == POCO_EMFILE || e.code() == ErrorCodes::TOO_MANY_REDIRECTS) - throw; + impl.reset(); + }); - /** Retry request unconditionally if nothing has been read yet. - * Otherwise if it is GET method retry with range header. - */ - bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET; - if (!can_retry_request) - throw; - - LOG_INFO( - log, - "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " - "Error: {}. (Current backoff wait is {}/{} ms)", - uri.toString(), - i + 1, - settings.http_max_tries, - getOffset(), - read_range.end ? toString(*read_range.end) : "unknown", - e.displayText(), - milliseconds_to_wait, - settings.http_retry_max_backoff_ms); - - on_retriable_error(); - exception = std::current_exception(); - } - } - - if (exception) - std::rethrow_exception(exception); - - if (!result) - { - /// Eof is reached, i.e response was fully read. - markSessionForReuse(session->getSession()); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); - return false; - } - - internal_buffer = impl->buffer(); - working_buffer = internal_buffer; - offset_from_begin_pos += working_buffer.size(); - return true; + return next_result; } -template -size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) +size_t ReadWriteBufferFromHTTP::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) const { /// Caller must have checked supportsReadAt(). - /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. + /// This ensures we've sent at least one HTTP request and populated current_uri. chassert(file_info && file_info->seekable); - Poco::URI uri_ = saved_uri_redirect.value_or(uri); - if (uri_.getPath().empty()) - uri_.setPath("/"); - size_t initial_n = n; - size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; + size_t total_bytes_copied = 0; + size_t bytes_copied = 0; + bool is_canceled = false; - for (size_t attempt = 0; n > 0; ++attempt) - { - bool last_attempt = attempt + 1 >= settings.http_max_tries; - - Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); - prepareRequest(request, uri_, HTTPRange { .begin = offset, .end = offset + n - 1}); - - LOG_TRACE(log, "Sending request to {} for range [{}, {})", uri_.toString(), offset, offset + n); - - auto sess = session->createDetachedSession(uri_); - - Poco::Net::HTTPResponse response; - std::istream * result_istr; - size_t bytes_copied = 0; - - try + doWithRetries( + /*callable=*/ [&] () { - sess->sendRequest(request); - result_istr = receiveResponse(*sess, request, response, /*allow_redirects*/ false); + auto range = HTTPRange{offset, offset + n - 1}; + + Poco::Net::HTTPResponse response; + auto result = callImpl(response, current_uri, method, range, false); if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT && (offset != 0 || offset + n < *file_info->file_size)) - throw Exception( - ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, - "Expected 206 Partial Content, got {} when reading {} range [{}, {})", - toString(response.getStatus()), uri_.toString(), offset, offset + n); - - copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &bytes_copied); - if (bytes_copied == n) { - result_istr->ignore(UINT64_MAX); - /// Response was fully read. - markSessionForReuse(*sess); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); + String reason = fmt::format( + "When reading with readBigAt {}." + "Cannot read with range: [{}, {}] (response status: {}, reason: {}), will retry", + initial_uri.toString(), + *range.begin, *range.end, + toString(response.getStatus()), response.getReason()); + + throw HTTPException( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + current_uri.toString(), + Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, + reason, + ""); } - } - catch (const Poco::Exception & e) + + copyFromIStreamWithProgressCallback(*result.response_stream, to, n, progress_callback, &bytes_copied, &is_canceled); + + offset += bytes_copied; + total_bytes_copied += bytes_copied; + to += bytes_copied; + n -= bytes_copied; + bytes_copied = 0; + }, + /*on_retry=*/ [&] () { - LOG_ERROR( - log, - "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", - uri_.toString(), offset, offset + n, attempt + 1, settings.http_max_tries, - e.what()); + offset += bytes_copied; + total_bytes_copied += bytes_copied; + to += bytes_copied; + n -= bytes_copied; + bytes_copied = 0; + }); - /// Decide whether to retry. - - if (last_attempt) - throw; - - /// Too many open files - non-retryable. - if (e.code() == POCO_EMFILE) - throw; - - if (const auto * h = dynamic_cast(&e); - h && !isRetriableError(static_cast(h->getHTTPStatus()))) - throw; - - sleepForMilliseconds(milliseconds_to_wait); - milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); - } - - /// Make sure retries don't re-read the bytes that we've already reported to progress_callback. - offset += bytes_copied; - to += bytes_copied; - n -= bytes_copied; - } - - return initial_n; + chassert(total_bytes_copied == initial_n || is_canceled); + return total_bytes_copied; } -template -off_t ReadWriteBufferFromHTTPBase::getPosition() { return getOffset() - available(); } +off_t ReadWriteBufferFromHTTP::getPosition() +{ + return getOffset() - available(); +} -template -off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int whence) +off_t ReadWriteBufferFromHTTP::seek(off_t offset_, int whence) { if (whence != SEEK_SET) throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); @@ -688,8 +564,8 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset) { pos = working_buffer.end() - (current_offset - offset_); - assert(pos >= working_buffer.begin()); - assert(pos < working_buffer.end()); + chassert(pos >= working_buffer.begin()); + chassert(pos < working_buffer.end()); return getPosition(); } @@ -700,7 +576,7 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (offset_ > position) { size_t diff = offset_ - position; - if (diff < settings.remote_read_min_bytes_for_seek) + if (diff < read_settings.remote_read_min_bytes_for_seek) { ignore(diff); return offset_; @@ -709,6 +585,7 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (!atEndOfRequestedRangeGuess()) ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection); + impl.reset(); } @@ -719,8 +596,8 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int return offset_; } -template -void ReadWriteBufferFromHTTPBase::setReadUntilPosition(size_t until) + +void ReadWriteBufferFromHTTP::setReadUntilPosition(size_t until) { until = std::max(until, 1ul); if (read_range.end && *read_range.end + 1 == until) @@ -736,8 +613,7 @@ void ReadWriteBufferFromHTTPBase::setReadUntilPosition(size } } -template -void ReadWriteBufferFromHTTPBase::setReadUntilEnd() +void ReadWriteBufferFromHTTP::setReadUntilEnd() { if (!read_range.end) return; @@ -752,11 +628,9 @@ void ReadWriteBufferFromHTTPBase::setReadUntilEnd() } } -template -bool ReadWriteBufferFromHTTPBase::supportsRightBoundedReads() const { return true; } +bool ReadWriteBufferFromHTTP::supportsRightBoundedReads() const { return true; } -template -bool ReadWriteBufferFromHTTPBase::atEndOfRequestedRangeGuess() +bool ReadWriteBufferFromHTTP::atEndOfRequestedRangeGuess() { if (!impl) return true; @@ -767,8 +641,7 @@ bool ReadWriteBufferFromHTTPBase::atEndOfRequestedRangeGues return false; } -template -std::string ReadWriteBufferFromHTTPBase::getResponseCookie(const std::string & name, const std::string & def) const +std::string ReadWriteBufferFromHTTP::getResponseCookie(const std::string & name, const std::string & def) const { for (const auto & cookie : cookies) if (cookie.getName() == name) @@ -776,19 +649,19 @@ std::string ReadWriteBufferFromHTTPBase::getResponseCookie( return def; } -template -void ReadWriteBufferFromHTTPBase::setNextCallback(NextCallback next_callback_) +void ReadWriteBufferFromHTTP::setNextCallback(NextCallback next_callback_) { next_callback = next_callback_; /// Some data maybe already read next_callback(count()); } -template -const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } +const std::string & ReadWriteBufferFromHTTP::getCompressionMethod() const +{ + return content_encoding; +} -template -std::optional ReadWriteBufferFromHTTPBase::tryGetLastModificationTime() +std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { if (!file_info) { @@ -805,12 +678,11 @@ std::optional ReadWriteBufferFromHTTPBase::tryGetLa return file_info->last_modified; } -template -HTTPFileInfo ReadWriteBufferFromHTTPBase::getFileInfo() +ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() { /// May be disabled in case the user knows in advance that the server doesn't support HEAD requests. /// Allows to avoid making unnecessary requests in such cases. - if (!settings.http_make_head_request) + if (!read_settings.http_make_head_request) return HTTPFileInfo{}; Poco::Net::HTTPResponse response; @@ -832,11 +704,11 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase::getFileInfo() throw; } + return parseFileInfo(response, 0); } -template -HTTPFileInfo ReadWriteBufferFromHTTPBase::parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin) +ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin) { HTTPFileInfo res; @@ -869,78 +741,3 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase::parseFileInfo(con } -SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_, ProxyConfiguration proxy_config_) - : timeouts(timeouts_), proxy_config(proxy_config_) {} - -SessionFactory::SessionType SessionFactory::buildNewSession(const Poco::URI & uri) -{ - return makeHTTPSession(uri, timeouts, proxy_config); -} - -ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, - const ConnectionTimeouts & timeouts, - const Poco::Net::HTTPBasicCredentials & credentials_, - const UInt64 max_redirects, - size_t buffer_size_, - const ReadSettings & settings_, - const HTTPHeaderEntries & http_header_entries_, - const RemoteHostFilter * remote_host_filter_, - bool delay_initialization_, - bool use_external_buffer_, - bool skip_not_found_url_, - std::optional file_info_, - ProxyConfiguration proxy_config_) - : Parent( - std::make_shared(uri_, max_redirects, std::make_shared(timeouts, proxy_config_)), - uri_, - credentials_, - method_, - out_stream_callback_, - buffer_size_, - settings_, - http_header_entries_, - remote_host_filter_, - delay_initialization_, - use_external_buffer_, - skip_not_found_url_, - file_info_, - proxy_config_) {} - - -PooledSessionFactory::PooledSessionFactory( - const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_) - : timeouts(timeouts_) - , per_endpoint_pool_size(per_endpoint_pool_size_) {} - -PooledSessionFactory::SessionType PooledSessionFactory::buildNewSession(const Poco::URI & uri) -{ - return makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); -} - - -PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, - const Poco::Net::HTTPBasicCredentials & credentials_, - size_t buffer_size_, - const UInt64 max_redirects, - PooledSessionFactoryPtr session_factory) - : Parent( - std::make_shared(uri_, max_redirects, session_factory), - uri_, - credentials_, - method_, - out_stream_callback_, - buffer_size_) {} - - -template class UpdatableSession; -template class UpdatableSession; -template class detail::ReadWriteBufferFromHTTPBase>>; -template class detail::ReadWriteBufferFromHTTPBase>>; - -} diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 63ca3e0417c..5df87fb6149 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "config.h" #include @@ -30,44 +29,19 @@ namespace DB { -template -class UpdatableSession +class ReadWriteBufferFromHTTP : public SeekableReadBuffer, public WithFileName, public WithFileSize { public: - using SessionPtr = typename TSessionFactory::SessionType; - - explicit UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr session_factory_); - - SessionPtr getSession(); - - void updateSession(const Poco::URI & uri); - - /// Thread safe. - SessionPtr createDetachedSession(const Poco::URI & uri); - - std::shared_ptr> clone(const Poco::URI & uri); + /// Information from HTTP response header. + struct HTTPFileInfo + { + // nullopt if the server doesn't report it. + std::optional file_size; + std::optional last_modified; + bool seekable = false; + }; private: - SessionPtr session; - UInt64 redirects{0}; - UInt64 max_redirects; - Poco::URI initial_uri; - std::shared_ptr session_factory; -}; - - -/// Information from HTTP response header. -struct HTTPFileInfo -{ - // nullopt if the server doesn't report it. - std::optional file_size; - std::optional last_modified; - bool seekable = false; -}; - - -namespace detail -{ /// Byte range, including right bound [begin, end]. struct HTTPRange { @@ -75,218 +49,208 @@ namespace detail std::optional end; }; - template - class ReadWriteBufferFromHTTPBase : public SeekableReadBuffer, public WithFileName, public WithFileSize + struct CallResult { - protected: - Poco::URI uri; - std::string method; - std::string content_encoding; + HTTPSessionPtr session; + std::istream * response_stream = nullptr; - UpdatableSessionPtr session; - std::istream * istr; /// owned by session - std::unique_ptr impl; - std::function out_stream_callback; - const Poco::Net::HTTPBasicCredentials & credentials; - std::vector cookies; - HTTPHeaderEntries http_header_entries; - const RemoteHostFilter * remote_host_filter = nullptr; - std::function next_callback; + CallResult(HTTPSessionPtr && session_, std::istream & response_stream_) + : session(session_) + , response_stream(&response_stream_) + {} + CallResult(CallResult &&) = default; + CallResult & operator= (CallResult &&) = default; - size_t buffer_size; - bool use_external_buffer; - - size_t offset_from_begin_pos = 0; - HTTPRange read_range; - std::optional file_info; - - /// Delayed exception in case retries with partial content are not satisfiable. - std::exception_ptr exception; - bool retry_with_range_header = false; - /// In case of redirects, save result uri to use it if we retry the request. - std::optional saved_uri_redirect; - - bool http_skip_not_found_url; - - ReadSettings settings; - LoggerPtr log; - - ProxyConfiguration proxy_config; - - bool withPartialContent(const HTTPRange & range) const; - - size_t getOffset() const; - - void prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const; - - std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info = false); - - size_t getFileSize() override; - - bool supportsReadAt() override; - - bool checkIfActuallySeekable() override; - - String getFileName() const override; - - enum class InitializeError - { - RETRYABLE_ERROR, - /// If error is not retriable, `exception` variable must be set. - NON_RETRYABLE_ERROR, - /// Allows to skip not found urls for globs - SKIP_NOT_FOUND_URL, - NONE, - }; - - InitializeError initialization_error = InitializeError::NONE; - - private: - void getHeadResponse(Poco::Net::HTTPResponse & response); - - void setupExternalBuffer(); - - public: - using NextCallback = std::function; - using OutStreamCallback = std::function; - - explicit ReadWriteBufferFromHTTPBase( - UpdatableSessionPtr session_, - Poco::URI uri_, - const Poco::Net::HTTPBasicCredentials & credentials_, - const std::string & method_ = {}, - OutStreamCallback out_stream_callback_ = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const ReadSettings & settings_ = {}, - HTTPHeaderEntries http_header_entries_ = {}, - const RemoteHostFilter * remote_host_filter_ = nullptr, - bool delay_initialization = false, - bool use_external_buffer_ = false, - bool http_skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt, - ProxyConfiguration proxy_config_ = {}); - - void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false); - - void call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false); - - /** - * Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and - * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, - * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. - */ - void initialize(); - - bool nextImpl() override; - - size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) override; - - off_t getPosition() override; - - off_t seek(off_t offset_, int whence) override; - - void setReadUntilPosition(size_t until) override; - - void setReadUntilEnd() override; - - bool supportsRightBoundedReads() const override; - - // If true, if we destroy impl now, no work was wasted. Just for metrics. - bool atEndOfRequestedRangeGuess(); - - std::string getResponseCookie(const std::string & name, const std::string & def) const; - - /// Set function to call on each nextImpl, useful when you need to track - /// progress. - /// NOTE: parameter on each call is not incremental -- it's all bytes count - /// passed through the buffer - void setNextCallback(NextCallback next_callback_); - - const std::string & getCompressionMethod() const; - - std::optional tryGetLastModificationTime(); - - HTTPFileInfo getFileInfo(); - - HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin); + std::unique_ptr transformToReadBuffer(size_t buf_size) &&; }; -} -class SessionFactory -{ -public: - explicit SessionFactory(const ConnectionTimeouts & timeouts_, ProxyConfiguration proxy_config_ = {}); + const HTTPConnectionGroupType connection_group; + const Poco::URI initial_uri; + const std::string method; + const ProxyConfiguration proxy_config; + const ReadSettings read_settings; + const ConnectionTimeouts timeouts; - using SessionType = HTTPSessionPtr; + const Poco::Net::HTTPBasicCredentials & credentials; + const RemoteHostFilter * remote_host_filter; - SessionType buildNewSession(const Poco::URI & uri); -private: - ConnectionTimeouts timeouts; - ProxyConfiguration proxy_config; -}; + const size_t buffer_size; + const size_t max_redirects; -class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> -{ - using SessionType = UpdatableSession; - using Parent = detail::ReadWriteBufferFromHTTPBase>; + const bool use_external_buffer; + const bool http_skip_not_found_url; + + std::function out_stream_callback; + + Poco::URI current_uri; + size_t redirects = 0; + + std::string content_encoding; + std::unique_ptr impl; + + std::vector cookies; + HTTPHeaderEntries http_header_entries; + std::function next_callback; + + size_t offset_from_begin_pos = 0; + HTTPRange read_range; + std::optional file_info; + + LoggerPtr log; + + bool withPartialContent() const; + + void prepareRequest(Poco::Net::HTTPRequest & request, std::optional range) const; + + void doWithRetries(std::function && callable, std::function on_retry = nullptr, bool mute_logging = false) const; + + CallResult callImpl( + Poco::Net::HTTPResponse & response, + const Poco::URI & uri_, + const std::string & method_, + const std::optional & range, + bool allow_redirects) const; + + CallResult callWithRedirects( + Poco::Net::HTTPResponse & response, + const String & method_, + const std::optional & range); + + std::unique_ptr initialize(); + + size_t getFileSize() override; + + bool supportsReadAt() override; + + bool checkIfActuallySeekable() override; + + String getFileName() const override; + + void getHeadResponse(Poco::Net::HTTPResponse & response); + + void setupExternalBuffer(); + + size_t getOffset() const; + + // If true, if we destroy impl now, no work was wasted. Just for metrics. + bool atEndOfRequestedRangeGuess(); public: + using NextCallback = std::function; + using OutStreamCallback = std::function; + ReadWriteBufferFromHTTP( - Poco::URI uri_, + const HTTPConnectionGroupType & connection_group_, + const Poco::URI & uri_, const std::string & method_, - OutStreamCallback out_stream_callback_, - const ConnectionTimeouts & timeouts, - const Poco::Net::HTTPBasicCredentials & credentials_, - const UInt64 max_redirects = 0, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const ReadSettings & settings_ = {}, - const HTTPHeaderEntries & http_header_entries_ = {}, - const RemoteHostFilter * remote_host_filter_ = nullptr, - bool delay_initialization_ = true, - bool use_external_buffer_ = false, - bool skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt, - ProxyConfiguration proxy_config_ = {}); -}; - -class PooledSessionFactory -{ -public: - explicit PooledSessionFactory( - const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_); - - using SessionType = PooledHTTPSessionPtr; - - /// Thread safe. - SessionType buildNewSession(const Poco::URI & uri); - -private: - ConnectionTimeouts timeouts; - size_t per_endpoint_pool_size; -}; - -using PooledSessionFactoryPtr = std::shared_ptr; - -class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> -{ - using SessionType = UpdatableSession; - using Parent = detail::ReadWriteBufferFromHTTPBase>; - -public: - explicit PooledReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, + ProxyConfiguration proxy_config_, + ReadSettings read_settings_, + ConnectionTimeouts timeouts_, const Poco::Net::HTTPBasicCredentials & credentials_, + const RemoteHostFilter * remote_host_filter_, size_t buffer_size_, - const UInt64 max_redirects, - PooledSessionFactoryPtr session_factory); + size_t max_redirects_, + OutStreamCallback out_stream_callback_, + bool use_external_buffer_, + bool http_skip_not_found_url_, + HTTPHeaderEntries http_header_entries_, + bool delay_initialization, + std::optional file_info_); + + bool nextImpl() override; + + size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) const override; + + off_t seek(off_t offset_, int whence) override; + + void setReadUntilPosition(size_t until) override; + + void setReadUntilEnd() override; + + bool supportsRightBoundedReads() const override; + + off_t getPosition() override; + + std::string getResponseCookie(const std::string & name, const std::string & def) const; + + /// Set function to call on each nextImpl, useful when you need to track + /// progress. + /// NOTE: parameter on each call is not incremental -- it's all bytes count + /// passed through the buffer + void setNextCallback(NextCallback next_callback_); + + const std::string & getCompressionMethod() const; + + std::optional tryGetLastModificationTime(); + + HTTPFileInfo getFileInfo(); + static HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin); }; +using ReadWriteBufferFromHTTPPtr = std::unique_ptr; -extern template class UpdatableSession; -extern template class UpdatableSession; -extern template class detail::ReadWriteBufferFromHTTPBase>>; -extern template class detail::ReadWriteBufferFromHTTPBase>>; +class BuilderRWBufferFromHTTP +{ + Poco::URI uri; + std::string method = Poco::Net::HTTPRequest::HTTP_GET; + HTTPConnectionGroupType connection_group = HTTPConnectionGroupType::HTTP; + ProxyConfiguration proxy_config{}; + ReadSettings read_settings{}; + ConnectionTimeouts timeouts{}; + const RemoteHostFilter * remote_host_filter = nullptr; + size_t buffer_size = DBMS_DEFAULT_BUFFER_SIZE; + size_t max_redirects = 0; + ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = nullptr; + bool use_external_buffer = false; + HTTPHeaderEntries http_header_entries{}; + bool delay_initialization = true; + +public: + BuilderRWBufferFromHTTP(Poco::URI uri_) + : uri(uri_) + {} + +#define setterMember(name, member) \ + BuilderRWBufferFromHTTP & name(decltype(BuilderRWBufferFromHTTP::member) arg_##member) \ + { \ + member = std::move(arg_##member); \ + return *this; \ + } + + setterMember(withConnectionGroup, connection_group) + setterMember(withMethod, method) + setterMember(withProxy, proxy_config) + setterMember(withSettings, read_settings) + setterMember(withTimeouts, timeouts) + setterMember(withHostFilter, remote_host_filter) + setterMember(withBufSize, buffer_size) + setterMember(withRedirects, max_redirects) + setterMember(withOutCallback, out_stream_callback) + setterMember(withHeaders, http_header_entries) + setterMember(withExternalBuf, use_external_buffer) + setterMember(withDelayInit, delay_initialization) +#undef setterMember + + ReadWriteBufferFromHTTPPtr create(const Poco::Net::HTTPBasicCredentials & credentials_) + { + return std::make_unique( + connection_group, + uri, + method, + proxy_config, + read_settings, + timeouts, + credentials_, + remote_host_filter, + buffer_size, + max_redirects, + out_stream_callback, + use_external_buffer, + /*http_skip_not_found_url=*/ false, + http_header_entries, + delay_initialization, + /*file_info_=*/ std::nullopt); + } +}; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index dbb93e63143..a29a4b0b8ee 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,5 +1,4 @@ #include -#include "Common/DNSResolver.h" #include "config.h" #if USE_AWS_S3 @@ -147,9 +146,7 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( - client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) - .withHTTPKeepAliveTimeout(Poco::Timespan( - client_configuration.http_keep_alive_timeout_ms * 1000)); /// flag indicating whether keep-alive is enabled is set to each session upon creation + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) @@ -164,8 +161,6 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , get_request_throttler(client_configuration.get_request_throttler) , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) - , http_connection_pool_size(client_configuration.http_connection_pool_size) - , wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit) { } @@ -308,12 +303,8 @@ void PocoHTTPClient::makeRequestInternal( Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { - /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. const auto request_configuration = per_request_configuration(); - if (http_connection_pool_size) - makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); - else - makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); } String getMethod(const Aws::Http::HttpRequest & request) @@ -335,7 +326,6 @@ String getMethod(const Aws::Http::HttpRequest & request) } } -template void PocoHTTPClient::makeRequestInternalImpl( Aws::Http::HttpRequest & request, const DB::ProxyConfiguration & proxy_configuration, @@ -343,8 +333,6 @@ void PocoHTTPClient::makeRequestInternalImpl( Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const { - using SessionPtr = std::conditional_t; - LoggerPtr log = getLogger("AWSClient"); auto uri = request.GetUri().GetURIString(); @@ -396,40 +384,17 @@ void PocoHTTPClient::makeRequestInternalImpl( for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); - SessionPtr session; - if (!proxy_configuration.host.empty()) - { - if (enable_s3_requests_logging) - LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); - /// Reverse proxy can replace host header with resolved ip address instead of host name. - /// This can lead to request signature difference on S3 side. - if constexpr (pooled) - session = makePooledHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - http_connection_pool_size, - wait_on_pool_size_limit, - proxy_configuration); - else - session = makeHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - proxy_configuration); - } - else - { - if constexpr (pooled) - session = makePooledHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - http_connection_pool_size, - wait_on_pool_size_limit); - else - session = makeHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true)); - } + if (enable_s3_requests_logging && !proxy_configuration.isEmpty()) + LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); + + auto group = for_disk_s3 ? HTTPConnectionGroupType::DISK : HTTPConnectionGroupType::STORAGE; + + auto session = makeHTTPSession( + group, + target_uri, + getTimeouts(method, first_attempt, /*first_byte*/ true), + proxy_configuration); /// In case of error this address will be written to logs request.SetResolvedRemoteHost(session->getResolvedAddress()); @@ -612,10 +577,6 @@ void PocoHTTPClient::makeRequestInternalImpl( response->SetClientErrorMessage(getCurrentExceptionMessage(false)); addMetric(request, S3MetricType::Errors); - - /// Probably this is socket timeout or something more or less related to DNS - /// Let's just remove this host from DNS cache to be more safe - DNSResolver::instance().removeHostFromCache(Poco::URI(uri).getHost()); } } diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 5178d75e7b6..a93a4dfbaf7 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -49,12 +49,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration ThrottlerPtr put_request_throttler; HTTPHeaderEntries extra_headers; - /// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established. - uint32_t http_keep_alive_timeout_ms = 0; - /// Zero means pooling will not be used. - size_t http_connection_pool_size = 0; /// See PoolBase::BehaviourOnLimit - bool wait_on_pool_size_limit = true; bool s3_use_adaptive_timeouts = true; std::function error_report; @@ -98,12 +93,6 @@ public: ); } - void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT - { - body_stream = Aws::Utils::Stream::ResponseStream( - Aws::New>("http result streambuf", session_, incoming_stream.rdbuf())); - } - void SetResponseBody(std::string & response_body) /// NOLINT { auto stream = Aws::New("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -163,7 +152,6 @@ private: EnumSize, }; - template void makeRequestInternalImpl( Aws::Http::HttpRequest & request, const DB::ProxyConfiguration & proxy_configuration, @@ -196,9 +184,6 @@ protected: ThrottlerPtr put_request_throttler; const HTTPHeaderEntries extra_headers; - - size_t http_connection_pool_size = 0; - bool wait_on_pool_size_limit = true; }; } diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index c002d30e633..798833e1a9b 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -82,7 +82,7 @@ public: /// (e.g. next() or supportsReadAt()). /// * Performance: there's no buffering. Each readBigAt() call typically translates into actual /// IO operation (e.g. HTTP request). Don't use it for small adjacent reads. - virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function & /*progress_callback*/ = nullptr) + virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function & /*progress_callback*/ = nullptr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readBigAt() not implemented"); } /// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat). diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/SessionAwareIOStream.h similarity index 97% rename from src/IO/S3/SessionAwareIOStream.h rename to src/IO/SessionAwareIOStream.h index babe52545d1..2380bd0fd60 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/SessionAwareIOStream.h @@ -3,7 +3,7 @@ #include -namespace DB::S3 +namespace DB { /** * Wrapper of IOStream to store response stream and corresponding HTTP session. diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 8ddcbc03b84..d54e1685017 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -7,6 +7,7 @@ namespace DB { WriteBufferFromHTTP::WriteBufferFromHTTP( + const HTTPConnectionGroupType & connection_group, const Poco::URI & uri, const std::string & method, const std::string & content_type, @@ -14,9 +15,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( const HTTPHeaderEntries & additional_headers, const ConnectionTimeouts & timeouts, size_t buffer_size_, - ProxyConfiguration proxy_configuration) + ProxyConfiguration proxy_configuration +) : WriteBufferFromOStream(buffer_size_) - , session{makeHTTPSession(uri, timeouts, proxy_configuration)} + , session{makeHTTPSession(connection_group, uri, timeouts, proxy_configuration)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} { request.setHost(uri.getHost()); diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index f1e1e2a9e91..09fd55ec290 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -19,7 +19,8 @@ namespace DB class WriteBufferFromHTTP : public WriteBufferFromOStream { public: - explicit WriteBufferFromHTTP(const Poco::URI & uri, + explicit WriteBufferFromHTTP(const HTTPConnectionGroupType & connection_group, + const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only const std::string & content_type = "", const std::string & content_encoding = "", diff --git a/src/IO/copyData.cpp b/src/IO/copyData.cpp index 07222a930b5..d2c7200c350 100644 --- a/src/IO/copyData.cpp +++ b/src/IO/copyData.cpp @@ -35,7 +35,7 @@ void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t } if (check_bytes && bytes > 0) - throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF."); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF, left to copy {} bytes.", bytes); } void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t bytes, std::function cancellation_hook, ThrottlerPtr throttler) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a81392cb3d8..d658fbe9920 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -380,8 +380,6 @@ struct ContextSharedPart : boost::noncopyable OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex); - /// The global pool of HTTP sessions for background fetches. - PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex); RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml @@ -5039,11 +5037,6 @@ void Context::initializeBackgroundExecutorsIfNeeded() ); LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size); - auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef()); - /// The number of background fetches is limited by the number of threads in the background thread pool. - /// It doesn't make any sense to limit the number of connections per host any further. - shared->fetches_session_factory = std::make_shared(timeouts, background_fetches_pool_size); - shared->fetch_executor = std::make_shared ( "Fetch", @@ -5097,12 +5090,6 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } -PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const -{ - SharedLockGuard lock(shared->background_executors_mutex); - return shared->fetches_session_factory; -} - IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const { callOnce(shared->readers_initialized, [&] { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b2310eaa85d..c8aa3604a6f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -206,9 +206,6 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; -class PooledSessionFactory; -using PooledSessionFactoryPtr = std::shared_ptr; - class SessionTracker; struct ServerSettings; @@ -1226,7 +1223,6 @@ public: OrdinaryBackgroundExecutorPtr getMovesExecutor() const; OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; - PooledSessionFactoryPtr getCommonFetchesSessionFactory() const; IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 6a8f82914bf..fe2baea6b4e 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -44,6 +44,11 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const { return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port); } + catch (const DB::NetException &) + { + /// Avoid "Host not found" exceptions + return false; + } catch (const Poco::Net::NetException &) { /// Avoid "Host not found" exceptions diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 37f3c8b2958..026e0c166b4 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -333,10 +334,17 @@ BlockIO InterpreterSystemQuery::execute() { getContext()->checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE); DNSResolver::instance().dropCache(); + HostResolversPool::instance().dropCache(); /// Reinitialize clusters to update their resolved_addresses system_context->reloadClusterConfig(); break; } + case Type::DROP_CONNECTIONS_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_CONNECTIONS_CACHE); + HTTPConnectionPools::instance().dropCache(); + break; + } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); system_context->clearMarkCache(); @@ -1201,6 +1209,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_DNS_CACHE: + case Type::DROP_CONNECTIONS_CACHE: case Type::DROP_MARK_CACHE: case Type::DROP_MMAP_CACHE: case Type::DROP_QUERY_CACHE: diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index e2ebaee8438..effc7207793 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -384,6 +384,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::KILL: case Type::SHUTDOWN: case Type::DROP_DNS_CACHE: + case Type::DROP_CONNECTIONS_CACHE: case Type::DROP_MMAP_CACHE: case Type::DROP_QUERY_CACHE: case Type::DROP_MARK_CACHE: diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 48be7f6b84f..70a9e27178d 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -22,6 +22,7 @@ public: KILL, SUSPEND, DROP_DNS_CACHE, + DROP_CONNECTIONS_CACHE, DROP_MARK_CACHE, DROP_UNCOMPRESSED_CACHE, DROP_INDEX_MARK_CACHE, diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 8ef2cda5587..2ed55cca30c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1016,7 +1016,7 @@ private: http_basic_credentials.authenticate(request); } - auto session = makePooledHTTPSession(url, timeouts, 1); + auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, url, timeouts); session->sendRequest(request); Poco::Net::HTTPResponse response; @@ -1025,8 +1025,6 @@ private: Poco::JSON::Parser parser; auto json_body = parser.parse(*response_body).extract(); - /// Response was fully read. - markSessionForReuse(session); auto schema = json_body->getValue("schema"); LOG_TRACE((getLogger("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 168c5f729ce..05e1129f9dc 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -526,14 +526,12 @@ std::pair Fetcher::fetchSelected creds.setPassword(password); } - std::unique_ptr in = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - nullptr, - creds, - DBMS_DEFAULT_BUFFER_SIZE, - 0, /* no redirects */ - context->getCommonFetchesSessionFactory()); + auto in = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::HTTP) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(timeouts) + .withDelayInit(false) + .create(creds); int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); @@ -557,11 +555,13 @@ std::pair Fetcher::fetchSelected if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { readBinary(sum_files_size, *in); + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; readBinary(ttl_infos_string, *in); + ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -609,6 +609,7 @@ std::pair Fetcher::fetchSelected } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); + if (revision) disk->syncRevision(revision); @@ -743,7 +744,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, size_t projections, bool is_projection, ThrottlerPtr throttler) @@ -799,7 +800,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( void Fetcher::downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler, @@ -807,6 +808,8 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( { size_t files; readBinary(files, in); + LOG_DEBUG(log, "Downloading files {}", files); + std::vector> written_files; @@ -872,7 +875,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( const String & tmp_prefix, DiskPtr disk, bool to_remote_disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, size_t projections, ThrottlerPtr throttler, diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 8c15dc3cfdb..45a6cf83872 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -20,7 +20,7 @@ namespace DB { class StorageReplicatedMergeTree; -class PooledReadWriteBufferFromHTTP; +class ReadWriteBufferFromHTTP; namespace DataPartsExchange { @@ -94,7 +94,7 @@ private: void downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler, @@ -107,7 +107,7 @@ private: const String & tmp_prefix_, DiskPtr disk, bool to_remote_disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, size_t projections, ThrottlerPtr throttler, @@ -120,7 +120,7 @@ private: const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, size_t projections, bool is_projection, ThrottlerPtr throttler); @@ -131,7 +131,7 @@ private: bool to_detached, const String & tmp_prefix_, DiskPtr disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, size_t projections, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4e3d8d38b0e..3b766ac8d26 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -963,7 +963,7 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata else columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames(); - /// DistributedSink will not own cluster, but will own ConnectionPools of the cluster + /// DistributedSink will not own cluster return std::make_shared( local_context, *this, metadata_snapshot, cluster, insert_sync, timeout, StorageID{remote_database, remote_table}, columns_to_send); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 72bbcdd3ea8..11da394feec 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1420,8 +1420,10 @@ void StorageS3::Configuration::connect(const ContextPtr & context) url.uri.getScheme()); client_configuration.endpointOverride = url.endpoint; + /// seems as we don't use it client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size; + client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; + auto headers = auth_settings.headers; if (!headers_from_ast.empty()) headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 039be222e7e..b539a152b69 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -461,21 +461,23 @@ std::pair> StorageURLSource: try { auto res = std::make_unique( + HTTPConnectionGroupType::STORAGE, request_uri, http_method, - callback, + proxy_config, + read_settings, timeouts, credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - read_settings, - headers, &context_->getRemoteHostFilter(), + settings.max_read_buffer_size, + settings.max_http_get_redirects, + callback, + /*use_external_buffer*/ false, + skip_url_not_found_error, + headers, delay_initialization, - /* use_external_buffer */ false, - /* skip_url_not_found_error */ skip_url_not_found_error, - /* file_info */ std::nullopt, - proxy_config); + /*file_info_*/ std::nullopt); + if (context_->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end)) { @@ -547,7 +549,7 @@ StorageURLSink::StorageURLSink( auto proxy_config = getProxyConfiguration(http_method); auto write_buffer = std::make_unique( - Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config + HTTPConnectionGroupType::STORAGE, Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config ); const auto & settings = context->getSettingsRef(); @@ -1320,24 +1322,17 @@ std::optional IStorageURLBase::tryGetLastModificationTime( auto proxy_config = getProxyConfiguration(uri.getScheme()); - ReadWriteBufferFromHTTP buf( - uri, - Poco::Net::HTTPRequest::HTTP_GET, - {}, - getHTTPTimeouts(context), - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - context->getReadSettings(), - headers, - &context->getRemoteHostFilter(), - true, - false, - false, - std::nullopt, - proxy_config); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withSettings(context->getReadSettings()) + .withTimeouts(getHTTPTimeouts(context)) + .withHostFilter(&context->getRemoteHostFilter()) + .withBufSize(settings.max_read_buffer_size) + .withRedirects(settings.max_http_get_redirects) + .withHeaders(headers) + .create(credentials); - return buf.tryGetLastModificationTime(); + return buf->tryGetLastModificationTime(); } StorageURL::StorageURL( diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index ca6d40a05a3..a5c16b3a5aa 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -153,17 +153,16 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex columns_info_uri.addQueryParameter("external_table_functions_use_nulls", toString(use_nulls)); Poco::Net::HTTPBasicCredentials credentials{}; - ReadWriteBufferFromHTTP buf( - columns_info_uri, - Poco::Net::HTTPRequest::HTTP_POST, - {}, - ConnectionTimeouts::getHTTPTimeouts( - context->getSettingsRef(), - context->getServerSettings().keep_alive_timeout), - credentials); + auto buf = BuilderRWBufferFromHTTP(columns_info_uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(ConnectionTimeouts::getHTTPTimeouts( + context->getSettingsRef(), + context->getServerSettings().keep_alive_timeout)) + .create(credentials); std::string columns_info; - readStringBinary(columns_info, buf); + readStringBinary(columns_info, *buf); NamesAndTypesList columns = NamesAndTypesList::parse(columns_info); return ColumnsDescription{columns}; diff --git a/tests/integration/test_backup_restore_new/test_cancel_backup.py b/tests/integration/test_backup_restore_new/test_cancel_backup.py index 6016bac9197..cce23a7e932 100644 --- a/tests/integration/test_backup_restore_new/test_cancel_backup.py +++ b/tests/integration/test_backup_restore_new/test_cancel_backup.py @@ -177,7 +177,7 @@ def cancel_restore(restore_id): def test_cancel_backup(): # We use partitioning so backups would contain more files. node.query( - "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%20" ) node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/setting.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/setting.xml index 23ab57f9330..d94ef68d9c4 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/setting.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/setting.xml @@ -5,6 +5,7 @@ 1 1 + 10000 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml index 7b1f503ed55..84f7f9f1b6d 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml @@ -19,6 +19,7 @@ minio minio123 1 + 10000 diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index e84209a03a1..dbcd7cc3c21 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -278,7 +278,7 @@ def test_unavailable_server(cluster): "Caught exception while loading metadata.*Connection refused" ) assert node2.contains_in_log( - "HTTP request to \`http://nginx:8080/test1/.*\` failed at try 1/10 with bytes read: 0/unknown. Error: Connection refused." + "Failed to make request to 'http://nginx:8080/test1/.*'. Error: 'Connection refused'. Failed at try 10/10." ) finally: node2.exec_in_container( diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 9c1c9797383..a6db26c8575 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -46,6 +46,7 @@ def cluster_without_dns_cache_update(): except Exception as ex: print(ex) + raise finally: cluster.shutdown() @@ -61,6 +62,7 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") + node2.query("SYSTEM DROP CONNECTIONS CACHE") # First we check, that normal replication works node1.query( @@ -86,6 +88,7 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") + node2.query("SYSTEM DROP CONNECTIONS CACHE") # Data is downloaded assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "6") @@ -124,6 +127,7 @@ def cluster_with_dns_cache_update(): except Exception as ex: print(ex) + raise finally: cluster.shutdown() @@ -267,6 +271,11 @@ def test_user_access_ip_change(cluster_with_dns_cache_update, node): privileged=True, user="root", ) + node.exec_in_container( + ["bash", "-c", 'clickhouse client -q "SYSTEM DROP CONNECTIONS CACHE"'], + privileged=True, + user="root", + ) retry_count = 1 assert_eq_with_retry( @@ -296,7 +305,8 @@ def test_host_is_drop_from_cache_after_consecutive_failures( # Note that the list of hosts in variable since lost_host will be there too (and it's dropped and added back) # dns_update_short -> dns_max_consecutive_failures set to 6 assert node4.wait_for_log_line( - "Code: 198. DB::Exception: Not found address of host: InvalidHostThatDoesNotExist." + regexp="Code: 198. DB::NetException: Not found address of host: InvalidHostThatDoesNotExist.", + look_behind_lines=300, ) assert node4.wait_for_log_line( "Cached hosts not found:.*InvalidHostThatDoesNotExist**", diff --git a/tests/integration/test_http_failover/test.py b/tests/integration/test_http_failover/test.py index 41b55ef635c..5920fd980ce 100644 --- a/tests/integration/test_http_failover/test.py +++ b/tests/integration/test_http_failover/test.py @@ -56,9 +56,10 @@ def dst_node_addrs(started_cluster, request): yield - # Clear static DNS entries + # Clear static DNS entries and all keep alive connections src_node.set_hosts([]) src_node.query("SYSTEM DROP DNS CACHE") + src_node.query("SYSTEM DROP CONNECTIONS CACHE") @pytest.mark.parametrize( @@ -77,7 +78,8 @@ def dst_node_addrs(started_cluster, request): def test_url_destination_host_with_multiple_addrs(dst_node_addrs, expectation): with expectation: result = src_node.query( - "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')", + settings={"http_max_tries": "3"}, ) assert result == "42\n" diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 17a9a03008e..033f02d7bde 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -150,7 +150,7 @@ def test_url_reconnect(started_cluster): def select(): global result result = node1.query( - "select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32') settings http_max_tries = 10, http_retry_max_backoff_ms=1000" + "select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32') settings http_max_tries=10, http_retry_max_backoff_ms=1000, http_make_head_request=false" ) assert int(result) == 6581218782194912115 diff --git a/tests/integration/test_s3_table_functions/test.py b/tests/integration/test_s3_table_functions/test.py index a6def175136..ff62d1a9eac 100644 --- a/tests/integration/test_s3_table_functions/test.py +++ b/tests/integration/test_s3_table_functions/test.py @@ -80,6 +80,7 @@ def test_s3_table_functions_timeouts(started_cluster): Test with timeout limit of 1200ms. This should raise an Exception and pass. """ + with PartitionManager() as pm: pm.add_network_delay(node, 1200) diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py index 5ef781bdc9e..70d49b7c1b0 100644 --- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py +++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py @@ -118,5 +118,5 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.wfile.write(b"OK") -httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) +httpd = http.server.ThreadingHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) httpd.serve_forever() diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index dc0fdd1a71d..931d18a3f80 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -12,6 +12,7 @@ import urllib.request import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn def is_ipv6(host): @@ -145,11 +146,19 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) t = threading.Thread(target=httpd.serve_forever) return t, httpd diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 7af299c6728..b18ae8a99be 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -105,6 +105,7 @@ NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SET DEFINER [] USER_NAME ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP CONNECTIONS CACHE ['SYSTEM DROP CONNECTIONS CACHE','DROP CONNECTIONS CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index f053387d1c5..187f55697e4 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -3,6 +3,7 @@ connect_timeout Seconds 10 connect_timeout_with_failover_ms Milliseconds 2000 connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 +s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index d8f8a32b6db..83089741bf2 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from http.server import SimpleHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn import socket import sys import threading @@ -116,11 +117,19 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(requests_amount): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) def real_func(): for i in range(requests_amount): diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python index 66ef3304098..5d06e4824b1 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.python +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn import socket import sys import re @@ -206,13 +207,22 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) t = threading.Thread(target=httpd.serve_forever) + t.start() return t, httpd @@ -235,8 +245,6 @@ def run_test(allow_range, settings, check_retries=False): HttpProcessor.responses_to_get = ["500", "200", "206"] retries_num = len(HttpProcessor.responses_to_get) - t, httpd = start_server() - t.start() test_select(settings) download_buffer_size = settings["max_download_buffer_size"] @@ -261,12 +269,12 @@ def run_test(allow_range, settings, check_retries=False): if HttpProcessor.range_used: raise Exception("HTTP Range used while not supported") - httpd.shutdown() - t.join() print("PASSED") def main(): + t, httpd = start_server() + settings = {"max_download_buffer_size": 20} # Test Accept-Ranges=False @@ -285,10 +293,15 @@ def main(): settings["max_download_threads"] = 2 run_test(allow_range=True, settings=settings, check_retries=True) + httpd.shutdown() + t.join() + if __name__ == "__main__": try: main() + sys.stdout.flush() + os._exit(0) except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, file=sys.stderr) diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index 288f1129b53..e346d9893a7 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "INSERT TO S3" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | sort +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort echo "CHECK WITH query_log" $CLICKHOUSE_CLIENT -nq " diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index ce90157d004..e4a1de9a2ec 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -19,8 +19,8 @@ query_id=$(${CLICKHOUSE_CLIENT} --query "select queryID() from ($query) limit 1" ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -nm --query " WITH - ProfileEvents['ReadBufferFromS3ResetSessions'] AS reset, - ProfileEvents['ReadBufferFromS3PreservedSessions'] AS preserved + ProfileEvents['DiskConnectionsReset'] AS reset, + ProfileEvents['DiskConnectionsPreserved'] AS preserved SELECT preserved > reset FROM system.query_log WHERE type = 'QueryFinish' @@ -51,7 +51,7 @@ select queryID() from( " 2>&1) ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -nm --query " -SELECT ProfileEvents['ReadWriteBufferFromHTTPPreservedSessions'] > 0 +SELECT ProfileEvents['StorageConnectionsPreserved'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.sh b/tests/queries/0_stateless/02833_url_without_path_encoding.sh index b71586099cf..eb845c6b45b 100755 --- a/tests/queries/0_stateless/02833_url_without_path_encoding.sh +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.sh @@ -8,5 +8,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=1" # Grep 'test%2Fa.tsv' to ensure that path wasn't encoded/decoded -$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | grep -o "test%2Fa.tsv" -m1 - +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | \ + grep -o "test%2Fa.tsv" -m1 | head -n 1 From f7f1d86e667117dd51b74747c1b1ed5f9339c466 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 4 Mar 2024 14:37:17 +0100 Subject: [PATCH 353/356] fix tests test_attach_without_fetching test_replicated_merge_tree_wait_on_shutdown --- src/Common/CurrentMetrics.cpp | 2 +- src/Disks/IO/ReadBufferFromWebServer.cpp | 1 - tests/integration/test_attach_without_fetching/test.py | 1 + .../test_replicated_merge_tree_wait_on_shutdown/test.py | 4 ++++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index f43481f665b..dfbf6199361 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -274,7 +274,7 @@ M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \ M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \ M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \ - M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") + M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \ \ M(StorageConnectionsStored, "Total count of sessions stored in the session pool for storages") \ M(StorageConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for storages") \ diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 7509aa81d75..03300cc0714 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -114,7 +114,6 @@ bool ReadBufferFromWebServer::nextImpl() chassert(working_buffer.begin() != nullptr); chassert(impl->buffer().begin() != nullptr); - chassert(working_buffer.begin() == impl->buffer().begin()); chassert(impl->available() == 0); diff --git a/tests/integration/test_attach_without_fetching/test.py b/tests/integration/test_attach_without_fetching/test.py index b430387e0f1..67352e2dcbe 100644 --- a/tests/integration/test_attach_without_fetching/test.py +++ b/tests/integration/test_attach_without_fetching/test.py @@ -56,6 +56,7 @@ def check_data(nodes, detached_parts): node.query_with_retry("SYSTEM SYNC REPLICA test") + for node in nodes: print("> Checking data integrity for", node.name) for i in range(10): diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index 67dd03098e9..995afedf415 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -40,6 +40,10 @@ def test_shutdown_and_wait(start_cluster): f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()" ) + # we stop merges on node1 to make node2 fetch all 51 origin parts from node1 + # and not to fetch a smaller set of merged covering parts + node1.query("SYSTEM STOP MERGES test_table") + node1.query("INSERT INTO test_table VALUES (0)") node2.query("SYSTEM SYNC REPLICA test_table") From 4df406d3adce0ae1fb55d742cf59ddd928e96ddb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 7 Mar 2024 13:56:51 +0100 Subject: [PATCH 354/356] work with review notes --- src/Client/Connection.cpp | 9 ++++---- src/Common/HTTPConnectionPool.cpp | 2 +- src/Common/HostResolvePool.cpp | 13 +++++++---- src/Common/HostResolvePool.h | 2 -- src/Common/ProfileEvents.cpp | 6 ++--- src/Common/tests/gtest_connection_pool.cpp | 27 ++++++++++++++++++++++ 6 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index a11a1243957..180942e6b83 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -212,8 +212,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// Remove this possible stale entry from cache DNSResolver::instance().removeHostFromCache(host); - /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. - throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription()); + /// Add server address to exception. Exception will preserve stack trace. + e.addMessage("({})", getDescription()); + throw; } catch (Poco::Net::NetException & e) { @@ -222,7 +223,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// Remove this possible stale entry from cache DNSResolver::instance().removeHostFromCache(host); - /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. + /// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost. throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription()); } catch (Poco::TimeoutException & e) @@ -232,7 +233,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// Remove this possible stale entry from cache DNSResolver::instance().removeHostFromCache(host); - /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. + /// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost. /// This exception can only be thrown from socket->connect(), so add information about connection timeout. const auto & connection_timeout = static_cast(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout; throw NetException( diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 18ffef34091..a21438a11a2 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -336,7 +336,7 @@ private: request_stream_completed = false; response_stream = nullptr; - response_stream_completed = true; + response_stream_completed = false; return result; } diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index f6cc9c919ba..6db28edc07e 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -13,7 +13,7 @@ namespace ProfileEvents { extern const Event AddressesDiscovered; extern const Event AddressesExpired; - extern const Event AddressesFailScored; + extern const Event AddressesMarkedAsFailed; } namespace CurrentMetrics @@ -34,7 +34,7 @@ HostResolverMetrics HostResolver::getMetrics() return HostResolverMetrics{ .discovered = ProfileEvents::AddressesDiscovered, .expired = ProfileEvents::AddressesExpired, - .failed = ProfileEvents::AddressesFailScored, + .failed = ProfileEvents::AddressesMarkedAsFailed, .active_count = CurrentMetrics::AddressesActive, }; } @@ -120,7 +120,6 @@ void HostResolver::updateWeights() } chassert((getTotalWeight() > 0 && !records.empty()) || records.empty()); - random_weight_picker = std::uniform_int_distribution(0, getTotalWeight() - 1); } HostResolver::Entry HostResolver::resolve() @@ -170,6 +169,7 @@ void HostResolver::setFail(const Poco::Net::IPAddress & address) Poco::Net::IPAddress HostResolver::selectBest() { chassert(!records.empty()); + auto random_weight_picker = std::uniform_int_distribution(0, getTotalWeight() - 1); size_t weight = random_weight_picker(thread_local_rng); auto it = std::partition_point(records.begin(), records.end(), [&](const Record & rec) { return rec.weight_prefix_sum <= weight; }); chassert(it != records.end()); @@ -178,8 +178,13 @@ Poco::Net::IPAddress HostResolver::selectBest() HostResolver::Records::iterator HostResolver::find(const Poco::Net::IPAddress & addr) TSA_REQUIRES(mutex) { - return std::lower_bound( + auto it = std::lower_bound( records.begin(), records.end(), addr, [](const Record & rec, const Poco::Net::IPAddress & value) { return rec.address < value; }); + + if (it != records.end() && it->address != addr) + return records.end(); + + return it; } bool HostResolver::isUpdateNeeded() diff --git a/src/Common/HostResolvePool.h b/src/Common/HostResolvePool.h index 2a31cec3b2d..4f127f05253 100644 --- a/src/Common/HostResolvePool.h +++ b/src/Common/HostResolvePool.h @@ -191,8 +191,6 @@ protected: Poco::Timestamp last_resolve_time TSA_GUARDED_BY(mutex); Records records TSA_GUARDED_BY(mutex); - std::uniform_int_distribution random_weight_picker TSA_GUARDED_BY(mutex); - Poco::Logger * log = &Poco::Logger::get("ConnectionPool"); }; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0c9582ab4fb..c1ac3d08245 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -721,9 +721,9 @@ The server successfully detected this situation and will download merged part fr M(HTTPConnectionsErrors, "Number of cases when creation of a http connection failed") \ M(HTTPConnectionsElapsedMicroseconds, "Total time spend on creating http connections") \ \ - M(AddressesDiscovered, "Total count of new addresses in dns resolve results for connection pools") \ - M(AddressesExpired, "Total count of expired addresses which is no longer presented in dns resolve results for for connection pools") \ - M(AddressesFailScored, "Total count of new addresses in dns resolve results for for connection pools") \ + M(AddressesDiscovered, "Total count of new addresses in dns resolve results for http connections") \ + M(AddressesExpired, "Total count of expired addresses which is no longer presented in dns resolve results for http connections") \ + M(AddressesMarkedAsFailed, "Total count of addresses which has been marked as faulty due to connection errors for http connections") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index 01b78958442..c271cc0e2ec 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -552,6 +552,33 @@ TEST_F(ConnectionPoolTest, HardLimit) ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); +} + +TEST_F(ConnectionPoolTest, NoReceiveCall) +{ + auto pool = getPool(); + + { + auto connection = pool->getConnection(timeouts); + + { + auto data = String("Hello"); + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_PUT, "/", "HTTP/1.1"); // HTTP/1.1 is required for keep alive + request.setContentLength(data.size()); + std::ostream & ostream = connection->sendRequest(request); + ostream << data; + } + + connection->flushRequest(); + } + + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); + ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); From 21b04143e81e5e2e9a6c5fa02103bcafdb4a27ed Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 7 Mar 2024 19:17:25 +0100 Subject: [PATCH 355/356] set vat RECORDS in main proccess --- tests/queries/0_stateless/02998_system_dns_cache_table.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sh b/tests/queries/0_stateless/02998_system_dns_cache_table.sh index 41d2386fe9c..b74fc00ab3b 100755 --- a/tests/queries/0_stateless/02998_system_dns_cache_table.sh +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.sh @@ -5,12 +5,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # Retries are necessary because the DNS cache may be flushed before second statement is executed -i=0 retries=3 +i=0 +retries=5 while [[ $i -lt $retries ]]; do - ${CLICKHOUSE_CURL} -sS --fail --data "SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers())" "${CLICKHOUSE_URL}" | grep -oP -q 'Ok.' && \ + ${CLICKHOUSE_CURL} -sS --fail --data "SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers())" "${CLICKHOUSE_URL}" | grep -oP -q 'Ok.' || continue + RECORDS=$(${CLICKHOUSE_CURL} -sS --fail --data "SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' and ip_family = 'IPv4';" "${CLICKHOUSE_URL}") - if [ "${RECORDS}" != "" ]; then + if [[ -n "${RECORDS}" ]]; then echo "${RECORDS}" exit 0 fi From 77c5de700f62451c6f2cf55620d9522832b5d56b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 9 Mar 2024 13:35:12 +0100 Subject: [PATCH 356/356] fix how web disk read empty directories --- .../ObjectStorages/Web/WebObjectStorage.cpp | 5 +-- src/IO/ReadWriteBufferFromHTTP.h | 4 ++- src/Storages/StorageURL.cpp | 32 ++++++++----------- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 0bad668a404..4adb92cf5c8 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -53,6 +53,7 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lockgetReadSettings()) .withTimeouts(timeouts) .withHostFilter(&getContext()->getRemoteHostFilter()) + .withSkipNotFound(true) .create(credentials); String file_name; @@ -98,10 +99,6 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock> StorageURLSource: try { - auto res = std::make_unique( - HTTPConnectionGroupType::STORAGE, - request_uri, - http_method, - proxy_config, - read_settings, - timeouts, - credentials, - &context_->getRemoteHostFilter(), - settings.max_read_buffer_size, - settings.max_http_get_redirects, - callback, - /*use_external_buffer*/ false, - skip_url_not_found_error, - headers, - delay_initialization, - /*file_info_*/ std::nullopt); - + auto res = BuilderRWBufferFromHTTP(request_uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(http_method) + .withProxy(proxy_config) + .withSettings(read_settings) + .withTimeouts(timeouts) + .withHostFilter(&context_->getRemoteHostFilter()) + .withBufSize(settings.max_read_buffer_size) + .withRedirects(settings.max_http_get_redirects) + .withOutCallback(callback) + .withSkipNotFound(skip_url_not_found_error) + .withHeaders(headers) + .withDelayInit(delay_initialization) + .create(credentials); if (context_->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end)) {