From 88bfa6c9ba2e15189dd0931a7ecb17b5e76ae81b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Mar 2021 13:25:26 +0300 Subject: [PATCH 01/22] Add ReadFromMergeTree step. --- .../QueryPlan/ReadFromMergeTree.cpp | 151 +++++++++++ src/Processors/QueryPlan/ReadFromMergeTree.h | 68 +++++ src/Processors/ya.make | 1 + .../MergeTreeBaseSelectProcessor.cpp | 4 +- .../MergeTree/MergeTreeBaseSelectProcessor.h | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 252 ++++++------------ src/Storages/MergeTree/MergeTreeReadPool.h | 2 +- 7 files changed, 307 insertions(+), 175 deletions(-) create mode 100644 src/Processors/QueryPlan/ReadFromMergeTree.cpp create mode 100644 src/Processors/QueryPlan/ReadFromMergeTree.h diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp new file mode 100644 index 00000000000..1972257c9f1 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +ReadFromMergeTree::ReadFromMergeTree( + const MergeTreeData & storage_, + StorageMetadataPtr metadata_snapshot_, + String query_id_, + Names required_columns_, + RangesInDataParts parts_, + PrewhereInfoPtr prewhere_info_, + Names virt_column_names_, + Settings settings_, + size_t num_streams_, + bool allow_mix_streams_, + bool read_reverse_) + : ISourceStep(DataStream{.header = MergeTreeBaseSelectProcessor::transformHeader( + metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), + prewhere_info_, + virt_column_names_)}) + , storage(storage_) + , metadata_snapshot(std::move(metadata_snapshot_)) + , query_id(std::move(query_id_)) + , required_columns(std::move(required_columns_)) + , parts(std::move(parts_)) + , prewhere_info(std::move(prewhere_info_)) + , virt_column_names(std::move(virt_column_names_)) + , settings(std::move(settings_)) + , num_streams(num_streams_) + , allow_mix_streams(allow_mix_streams_) + , read_reverse(read_reverse_) +{ +} + +Pipe ReadFromMergeTree::readFromPool() +{ + Pipes pipes; + size_t sum_marks = 0; + size_t total_rows = 0; + + for (const auto & part : parts) + { + sum_marks += part.getMarksCount(); + total_rows += part.getRowsCount(); + } + + auto pool = std::make_shared( + num_streams, + sum_marks, + settings.min_marks_for_concurrent_read, + std::move(parts), + storage, + metadata_snapshot, + prewhere_info, + true, + required_columns, + settings.backoff_settings, + settings.preferred_block_size_bytes, + false); + + auto * logger = &Poco::Logger::get(storage.getLogName() + " (SelectExecutor)"); + LOG_TRACE(logger, "Reading approx. {} rows with {} streams", total_rows, num_streams); + + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared( + i, pool, settings.min_marks_for_concurrent_read, settings.max_block_size, + settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, + storage, metadata_snapshot, settings.use_uncompressed_cache, + prewhere_info, settings.reader_settings, virt_column_names); + + if (i == 0) + { + /// Set the approximate number of rows for the first source only + source->addTotalRowsApprox(total_rows); + } + + pipes.emplace_back(std::move(source)); + } + + return Pipe::unitePipes(std::move(pipes)); +} + +template +ProcessorPtr ReadFromMergeTree::createSource(const RangesInDataPart & part) +{ + return std::make_shared( + storage, metadata_snapshot, part.data_part, settings.max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, required_columns, part.ranges, settings.use_uncompressed_cache, + prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query); +} + +Pipe ReadFromMergeTree::readFromSeparateParts() +{ + Pipes pipes; + for (const auto & part : parts) + { + auto source = read_reverse + ? createSource(part) + : createSource(part); + + std::make_shared( + storage, metadata_snapshot, part.data_part, settings.max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, required_columns, part.ranges, settings.use_uncompressed_cache, + prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query); + + pipes.emplace_back(std::move(source)); + } + + return Pipe::unitePipes(std::move(pipes)); +} + +Pipe ReadFromMergeTree::read() +{ + if (allow_mix_streams && num_streams > 1) + return readFromPool(); + + auto pipe = readFromSeparateParts(); + if (allow_mix_streams) + { + /// Use ConcatProcessor to concat sources together. + /// It is needed to read in parts order (and so in PK order) if single thread is used. + if (pipe.numOutputPorts() > 1) + pipe.addTransform(std::make_shared(pipe.getHeader(), pipe.numOutputPorts())); + } + + return pipe; +} + +void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +{ + Pipe pipe = read(); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + // Attach QueryIdHolder if needed + if (!query_id.empty()) + pipe.addQueryIdHolder(std::make_shared(query_id, storage)); + + pipeline.init(std::move(pipe)); +} + +} diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h new file mode 100644 index 00000000000..08043490bc7 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -0,0 +1,68 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +/// Create source from prepared pipe. +class ReadFromMergeTree : public ISourceStep +{ +public: + + struct Settings + { + UInt64 max_block_size; + size_t preferred_block_size_bytes; + size_t preferred_max_column_in_block_size_bytes; + size_t min_marks_for_concurrent_read; + bool use_uncompressed_cache; + + MergeTreeReaderSettings reader_settings; + MergeTreeReadPool::BackoffSettings backoff_settings; + }; + + explicit ReadFromMergeTree( + const MergeTreeData & storage_, + StorageMetadataPtr metadata_snapshot_, + String query_id_, + Names required_columns_, + RangesInDataParts parts_, + PrewhereInfoPtr prewhere_info_, + Names virt_column_names_, + Settings settings_, + size_t num_streams_, + bool allow_mix_streams_, + bool read_reverse_ + ); + + String getName() const override { return "ReadFromMergeTree"; } + + void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + +private: + const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; + String query_id; + + Names required_columns; + RangesInDataParts parts; + PrewhereInfoPtr prewhere_info; + Names virt_column_names; + Settings settings; + + size_t num_streams; + bool allow_mix_streams; + bool read_reverse; + + Pipe read(); + Pipe readFromPool(); + Pipe readFromSeparateParts(); + + template + ProcessorPtr createSource(const RangesInDataPart & part); +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 35abfbae756..9ef888ff1d8 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -125,6 +125,7 @@ SRCS( QueryPlan/PartialSortingStep.cpp QueryPlan/QueryIdHolder.cpp QueryPlan/QueryPlan.cpp + QueryPlan/ReadFromMergeTree.cpp QueryPlan/ReadFromPreparedSource.cpp QueryPlan/ReadNothingStep.cpp QueryPlan/ReverseRowsStep.cpp diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 6bf164dd824..41ad71c89ce 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -30,7 +30,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, const Names & virt_column_names_) - : SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_)) + : SourceWithProgress(transformHeader(std::move(header), prewhere_info_, virt_column_names_)) , storage(storage_) , metadata_snapshot(metadata_snapshot_) , prewhere_info(prewhere_info_) @@ -370,7 +370,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P } } -Block MergeTreeBaseSelectProcessor::getHeader( +Block MergeTreeBaseSelectProcessor::transformHeader( Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns) { executePrewhereActions(block, prewhere_info); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 00ef131ae45..a4c55cbae45 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -33,6 +33,8 @@ public: ~MergeTreeBaseSelectProcessor() override; + static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns); + static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info); protected: @@ -49,8 +51,6 @@ protected: static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns); static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns); - static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns); - void initializeRangeReaders(MergeTreeReadTask & task); protected: diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dcfc3293bb6..701c56c03f6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -950,23 +951,6 @@ size_t minMarksForConcurrentRead( } -static QueryPlanPtr createPlanFromPipe(Pipe pipe, const String & query_id, const MergeTreeData & data, const std::string & description = "") -{ - auto plan = std::make_unique(); - - std::string storage_name = "MergeTree"; - if (!description.empty()) - storage_name += ' ' + description; - - // Attach QueryIdHolder if needed - if (!query_id.empty()) - pipe.addQueryIdHolder(std::make_shared(query_id, data)); - - auto step = std::make_unique(std::move(pipe), storage_name); - plan->addStep(std::move(step)); - return plan; -} - QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( RangesInDataParts && parts, size_t num_streams, @@ -1020,6 +1004,17 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( if (0 == sum_marks) return {}; + ReadFromMergeTree::Settings step_settings + { + .max_block_size = max_block_size, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes, + .min_marks_for_concurrent_read = min_marks_for_concurrent_read, + .use_uncompressed_cache = use_uncompressed_cache, + .reader_settings = reader_settings, + .backoff_settings = MergeTreeReadPool::BackoffSettings(settings), + }; + if (num_streams > 1) { /// Parallel query execution. @@ -1028,67 +1023,16 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( /// Reduce the number of num_streams if the data is small. if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); - - MergeTreeReadPoolPtr pool = std::make_shared( - num_streams, - sum_marks, - min_marks_for_concurrent_read, - std::move(parts), - data, - metadata_snapshot, - query_info.prewhere_info, - true, - column_names, - MergeTreeReadPool::BackoffSettings(settings), - settings.preferred_block_size_bytes, - false); - - /// Let's estimate total number of rows for progress bar. - LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows, num_streams); - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, - settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, - data, metadata_snapshot, use_uncompressed_cache, - query_info.prewhere_info, reader_settings, virt_columns); - - if (i == 0) - { - /// Set the approximate number of rows for the first source only - source->addTotalRowsApprox(total_rows); - } - - res.emplace_back(std::move(source)); - } - - return createPlanFromPipe(Pipe::unitePipes(std::move(res)), query_id, data); } - else - { - /// Sequential query execution. - Pipes res; - for (const auto & part : parts) - { - auto source = std::make_shared( - data, metadata_snapshot, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, - query_info.prewhere_info, true, reader_settings, virt_columns, part.part_index_in_query); + auto plan = std::make_unique(); + auto step = std::make_unique( + data, metadata_snapshot, query_id, + column_names, std::move(parts), query_info.prewhere_info, virt_columns, + step_settings, num_streams, /*allow_mix_streams*/ true, /*read_reverse*/ false); - res.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(res)); - - /// Use ConcatProcessor to concat sources together. - /// It is needed to read in parts order (and so in PK order) if single thread is used. - if (pipe.numOutputPorts() > 1) - pipe.addTransform(std::make_shared(pipe.getHeader(), pipe.numOutputPorts())); - - return createPlanFromPipe(std::move(pipe), query_id, data); - } + plan->addStep(std::move(step)); + return plan; } static ActionsDAGPtr createProjection(const Block & header) @@ -1208,8 +1152,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { size_t need_marks = min_marks_per_stream; - - Pipes pipes; + RangesInDataParts new_parts; /// Loop over parts. /// We will iteratively take part or some subrange of a part from the back @@ -1264,48 +1207,31 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( parts.emplace_back(part); } ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction); - - if (input_order_info->direction == 1) - { - pipes.emplace_back(std::make_shared( - data, - metadata_snapshot, - part.data_part, - max_block_size, - settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, - column_names, - ranges_to_get_from_part, - use_uncompressed_cache, - query_info.prewhere_info, - true, - reader_settings, - virt_columns, - part.part_index_in_query)); - } - else - { - pipes.emplace_back(std::make_shared( - data, - metadata_snapshot, - part.data_part, - max_block_size, - settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, - column_names, - ranges_to_get_from_part, - use_uncompressed_cache, - query_info.prewhere_info, - true, - reader_settings, - virt_columns, - part.part_index_in_query)); - } + new_parts.emplace_back(part.data_part, part.part_index_in_query, std::move(ranges_to_get_from_part)); } - auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, "with order"); + ReadFromMergeTree::Settings step_settings + { + .max_block_size = max_block_size, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes, + .min_marks_for_concurrent_read = min_marks_for_concurrent_read, + .use_uncompressed_cache = use_uncompressed_cache, + .reader_settings = reader_settings, + .backoff_settings = MergeTreeReadPool::BackoffSettings(settings), + }; - if (input_order_info->direction != 1) + bool read_reverse = input_order_info->direction != 1; + + auto plan = std::make_unique(); + auto step = std::make_unique( + data, metadata_snapshot, query_id, + column_names, std::move(new_parts), query_info.prewhere_info, virt_columns, + step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ read_reverse); + + plan->addStep(std::move(step)); + + if (read_reverse) { auto reverse_step = std::make_unique(plan->getCurrentDataStream()); plan->addStep(std::move(reverse_step)); @@ -1403,7 +1329,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( num_streams = settings.max_final_threads; /// If setting do_not_merge_across_partitions_select_final is true than we won't merge parts from different partitions. - /// We have all parts in parts vector, where parts with same partition are nerby. + /// We have all parts in parts vector, where parts with same partition are nearby. /// So we will store iterators pointed to the beginning of each partition range (and parts.end()), /// then we will create a pipe for each partition that will run selecting processor and merging processor /// for the parts with this partition. In the end we will unite all the pipes. @@ -1442,7 +1368,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( QueryPlanPtr plan; { - Pipes pipes; + RangesInDataParts new_parts; /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// with level > 0 then we won't postprocess this part and if num_streams > 1 we @@ -1461,36 +1387,35 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( { for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) { - auto source_processor = std::make_shared( - data, - metadata_snapshot, - part_it->data_part, - max_block_size, - settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, - column_names, - part_it->ranges, - use_uncompressed_cache, - query_info.prewhere_info, - true, - reader_settings, - virt_columns, - part_it->part_index_in_query); - - pipes.emplace_back(std::move(source_processor)); + new_parts.emplace_back(part_it->data_part, part_it->part_index_in_query, part_it->ranges); } } - if (pipes.empty()) + if (new_parts.empty()) continue; - auto pipe = Pipe::unitePipes(std::move(pipes)); + ReadFromMergeTree::Settings step_settings + { + .max_block_size = max_block_size, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes, + .min_marks_for_concurrent_read = 0, /// this setting is not used for reading in order + .use_uncompressed_cache = use_uncompressed_cache, + .reader_settings = reader_settings, + .backoff_settings = MergeTreeReadPool::BackoffSettings(settings), + }; + + plan = std::make_unique(); + auto step = std::make_unique( + data, metadata_snapshot, query_id, + column_names, std::move(new_parts), query_info.prewhere_info, virt_columns, + step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ false); + + plan->addStep(std::move(step)); /// Drop temporary columns, added by 'sorting_key_expr' if (!out_projection) - out_projection = createProjection(pipe.getHeader()); - - plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final"); + out_projection = createProjection(plan->getCurrentDataStream().header); } auto expression_step = std::make_unique( @@ -1537,7 +1462,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (!lonely_parts.empty()) { - Pipes pipes; + RangesInDataParts new_parts; size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size(); @@ -1552,41 +1477,28 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts) num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size()); - - MergeTreeReadPoolPtr pool = std::make_shared( - num_streams_for_lonely_parts, - sum_marks_in_lonely_parts, - min_marks_for_concurrent_read, - std::move(lonely_parts), - data, - metadata_snapshot, - query_info.prewhere_info, - true, - column_names, - MergeTreeReadPool::BackoffSettings(settings), - settings.preferred_block_size_bytes, - false); - - LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows_in_lonely_parts, num_streams_for_lonely_parts); - - for (size_t i = 0; i < num_streams_for_lonely_parts; ++i) + ReadFromMergeTree::Settings step_settings { - auto source = std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, - settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, - data, metadata_snapshot, use_uncompressed_cache, - query_info.prewhere_info, reader_settings, virt_columns); + .max_block_size = max_block_size, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes, + .min_marks_for_concurrent_read = min_marks_for_concurrent_read, + .use_uncompressed_cache = use_uncompressed_cache, + .reader_settings = reader_settings, + .backoff_settings = MergeTreeReadPool::BackoffSettings(settings), + }; - pipes.emplace_back(std::move(source)); - } + auto plan = std::make_unique(); + auto step = std::make_unique( + data, metadata_snapshot, query_id, + column_names, std::move(lonely_parts), query_info.prewhere_info, virt_columns, + step_settings, num_streams_for_lonely_parts, /*allow_mix_streams*/ true, /*read_reverse*/ false); - auto pipe = Pipe::unitePipes(std::move(pipes)); + plan->addStep(std::move(step)); /// Drop temporary columns, added by 'sorting_key_expr' if (!out_projection) - out_projection = createProjection(pipe.getHeader()); - - QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final"); + out_projection = createProjection(plan->getCurrentDataStream().header); auto expression_step = std::make_unique( plan->getCurrentDataStream(), diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 366e9a2381a..9949bdf86f8 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -100,7 +100,7 @@ private: const MergeTreeData & data; StorageMetadataPtr metadata_snapshot; - Names column_names; + const Names column_names; bool do_not_steal_tasks; bool predict_block_size_bytes; std::vector per_part_column_name_set; From 8fad179800cfee9a83860e223d41208e2b2f12c8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Mar 2021 18:14:20 +0300 Subject: [PATCH 02/22] Fix some tests. --- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 3 -- ...1_mergetree_read_in_order_spread.reference | 6 ++-- ...monotonous_functions_in_order_by.reference | 6 ++-- .../01576_alias_column_rewrite.reference | 28 +++++++++---------- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1972257c9f1..7fac4ca738b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -91,7 +91,7 @@ Pipe ReadFromMergeTree::readFromPool() template ProcessorPtr ReadFromMergeTree::createSource(const RangesInDataPart & part) { - return std::make_shared( + return std::make_shared( storage, metadata_snapshot, part.data_part, settings.max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, required_columns, part.ranges, settings.use_uncompressed_cache, prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 701c56c03f6..235f1a16fd0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1017,9 +1017,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( if (num_streams > 1) { - /// Parallel query execution. - Pipes res; - /// Reduce the number of num_streams if the data is small. if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference index becc626c1bb..835e2af269a 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference @@ -13,16 +13,16 @@ ExpressionTransform (MergingSorted) (Expression) ExpressionTransform - (ReadFromStorage) + (ReadFromMergeTree) MergeTree 0 → 1 (MergingSorted) MergingSortedTransform 2 → 1 (Expression) ExpressionTransform × 2 - (ReadFromStorage) + (ReadFromMergeTree) MergeTree × 2 0 → 1 (MergingSorted) (Expression) ExpressionTransform - (ReadFromStorage) + (ReadFromMergeTree) MergeTree 0 → 1 diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index a1a1814a581..0eb7e06f724 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -11,7 +11,7 @@ Expression (Projection) PartialSorting (Sort each block for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree) + ReadFromMergeTree SELECT timestamp, key @@ -23,7 +23,7 @@ Expression (Projection) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree SELECT timestamp, key @@ -37,7 +37,7 @@ Expression (Projection) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree SELECT timestamp, key diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 334ebc7eb1f..c5679544e1d 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -28,47 +28,47 @@ Expression (Projection) PartialSorting (Sort each block for ORDER BY) Expression ((Before ORDER BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree) + ReadFromMergeTree Expression (Projection) Limit (preliminary LIMIT) FinishSorting Expression ((Before ORDER BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree + ReadFromMergeTree + ReadFromMergeTree Expression (Projection) Limit (preliminary LIMIT) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree + ReadFromMergeTree + ReadFromMergeTree optimize_aggregation_in_order Expression ((Projection + Before ORDER BY)) Aggregating Expression ((Before GROUP BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree) + ReadFromMergeTree Expression ((Projection + Before ORDER BY)) Aggregating Expression ((Before GROUP BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree + ReadFromMergeTree + ReadFromMergeTree Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromMergeTree + ReadFromMergeTree + ReadFromMergeTree second-index 1 1 From 7bc891d78c92c68a986f52aea682e9fbcf17d3af Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Mar 2021 19:03:30 +0300 Subject: [PATCH 03/22] Fix other tests. --- tests/queries/0_stateless/00717_merge_and_distributed.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00717_merge_and_distributed.sql b/tests/queries/0_stateless/00717_merge_and_distributed.sql index f0d34b5165f..35dad18937a 100644 --- a/tests/queries/0_stateless/00717_merge_and_distributed.sql +++ b/tests/queries/0_stateless/00717_merge_and_distributed.sql @@ -18,9 +18,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1'); SELECT *, _table FROM merge(currentDatabase(), 'test_local_1') ORDER BY _table; SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1') GROUP BY _table ORDER BY _table; SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table = 'test_local_1'; -SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError 16 } +SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError 10 } SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table in ('test_local_1', 'test_local_2'); -SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError 16 } +SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError 10 } SELECT '--------------Single Distributed------------'; SELECT * FROM merge(currentDatabase(), 'test_distributed_1'); @@ -36,9 +36,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _ta SELECT *, _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _table; SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') GROUP BY _table ORDER BY _table; SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table = 'test_local_1'; -SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- { serverError 16 } +SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- { serverError 10 } SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- { serverError 16 } +SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- { serverError 10 } SELECT '--------------Local Merge Distributed------------'; SELECT * FROM merge(currentDatabase(), 'test_local_1|test_distributed_2') ORDER BY _table; From 9f39f5d52d9ec2019e709de0de4537da75406754 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 6 Apr 2021 15:39:55 +0300 Subject: [PATCH 04/22] Add more counters to MergeTreeDataSelectExecutor --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 113 ++++++++++++------ .../MergeTree/MergeTreeDataSelectExecutor.h | 16 ++- 2 files changed, 89 insertions(+), 40 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 235f1a16fd0..e6ee156ff7f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -273,11 +273,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context; - if (query_context.getSettingsRef().allow_experimental_query_deduplication) - selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context); - else - selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read); + PartFilterCounters part_filter_counters; + if (query_context.getSettingsRef().allow_experimental_query_deduplication) + selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context, part_filter_counters); + else + selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, part_filter_counters); /// Sampling. Names column_names_to_read = real_column_names; @@ -559,6 +560,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( MergeTreeIndexConditionPtr condition; std::atomic total_granules{0}; std::atomic granules_dropped{0}; + std::atomic total_parts{0}; + std::atomic parts_dropped{0}; DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_) : index(index_) @@ -633,25 +636,26 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( RangesInDataPart ranges(part, part_index); - total_marks_pk.fetch_add(part->index_granularity.getMarksCount(), std::memory_order_relaxed); + size_t total_marks_count = part->getMarksCount(); + if (total_marks_count && part->index_granularity.hasFinalMark()) + --total_marks_count; + + total_marks_pk.fetch_add(total_marks_count, std::memory_order_relaxed); if (metadata_snapshot->hasPrimaryKey()) ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log); - else - { - size_t total_marks_count = part->getMarksCount(); - if (total_marks_count) - { - if (part->index_granularity.hasFinalMark()) - --total_marks_count; - ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}}; - } - } + else if (total_marks_count) + ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}}; sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed); for (auto & index_and_condition : useful_indices) { + if (ranges.ranges.empty()) + break; + + index_and_condition.total_parts.fetch_add(1, std::memory_order_relaxed); + size_t total_granules = 0; size_t granules_dropped = 0; ranges.ranges = filterMarksUsingIndex( @@ -663,6 +667,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed); index_and_condition.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + + if (ranges.ranges.empty()) + index_and_condition.parts_dropped.fetch_add(1, std::memory_order_relaxed); } if (!ranges.ranges.empty()) @@ -1796,7 +1803,8 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, - const PartitionIdToMaxBlock * max_block_numbers_to_read) + const PartitionIdToMaxBlock * max_block_numbers_to_read, + PartFilterCounters & counters) { auto prev_parts = parts; parts.clear(); @@ -1809,22 +1817,35 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( if (part->isEmpty()) continue; + if (max_block_numbers_to_read) + { + auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); + if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + continue; + } + + size_t num_granules = part->getMarksCount(); + if (num_granules && part->index_granularity.hasFinalMark()) + --num_granules; + + counters.num_initial_selected_parts += 1; + counters.num_initial_selected_granules += num_granules; + if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle( part->minmax_idx.hyperrectangle, minmax_columns_types).can_be_true) continue; + counters.num_parts_after_minmax += 1; + counters.num_granules_after_minmax += num_granules; + if (partition_pruner) { if (partition_pruner->canBePruned(part)) continue; } - if (max_block_numbers_to_read) - { - auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); - if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) - continue; - } + counters.num_parts_after_partition += 1; + counters.num_granules_after_partition += num_granules; parts.push_back(part); } @@ -1837,7 +1858,8 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, - const Context & query_context) const + const Context & query_context, + PartFilterCounters & counters) const { /// const_cast to add UUIDs to context. Bad practice. Context & non_const_context = const_cast(query_context); @@ -1860,17 +1882,6 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( if (part->isEmpty()) continue; - if (minmax_idx_condition - && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types) - .can_be_true) - continue; - - if (partition_pruner) - { - if (partition_pruner->canBePruned(part)) - continue; - } - if (max_block_numbers_to_read) { auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); @@ -1878,13 +1889,37 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( continue; } + /// Skip the part if its uuid is meant to be excluded + if (part->uuid != UUIDHelpers::Nil && ignored_part_uuids->has(part->uuid)) + continue; + + size_t num_granules = part->getMarksCount(); + if (num_granules && part->index_granularity.hasFinalMark()) + --num_granules; + + counters.num_initial_selected_parts += 1; + counters.num_initial_selected_granules += num_granules; + + if (minmax_idx_condition + && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types) + .can_be_true) + continue; + + counters.num_parts_after_minmax += 1; + counters.num_granules_after_minmax += num_granules; + + if (partition_pruner) + { + if (partition_pruner->canBePruned(part)) + continue; + } + + counters.num_parts_after_partition += 1; + counters.num_granules_after_partition += num_granules; + /// populate UUIDs and exclude ignored parts if enabled if (part->uuid != UUIDHelpers::Nil) { - /// Skip the part if its uuid is meant to be excluded - if (ignored_part_uuids->has(part->uuid)) - continue; - auto result = temp_part_uuids.insert(part->uuid); if (!result.second) throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR); @@ -1916,6 +1951,8 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( { LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them"); + counters = PartFilterCounters(); + /// Second attempt didn't help, throw an exception if (!select_parts(parts)) throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 634719639ad..b64e80646f7 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -117,6 +117,16 @@ private: size_t & granules_dropped, Poco::Logger * log); + struct PartFilterCounters + { + size_t num_initial_selected_parts = 0; + size_t num_initial_selected_granules = 0; + size_t num_parts_after_minmax = 0; + size_t num_granules_after_minmax = 0; + size_t num_parts_after_partition = 0; + size_t num_granules_after_partition = 0; + }; + /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, /// as well as `max_block_number_to_read`. static void selectPartsToRead( @@ -125,7 +135,8 @@ private: const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, - const PartitionIdToMaxBlock * max_block_numbers_to_read); + const PartitionIdToMaxBlock * max_block_numbers_to_read, + PartFilterCounters & counters); /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded. void selectPartsToReadWithUUIDFilter( @@ -135,7 +146,8 @@ private: const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, - const Context & query_context) const; + const Context & query_context, + PartFilterCounters & counters) const; }; } From 7c5a9133dffdbdfcda4d7b1121c9340d7c79a977 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 11:19:04 +0300 Subject: [PATCH 05/22] Add index info to ReadFromStorageStep. --- .../QueryPlan/ReadFromMergeTree.cpp | 17 ++++++ src/Processors/QueryPlan/ReadFromMergeTree.h | 21 ++++++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 53 +++++++++++++++++-- .../MergeTree/MergeTreeDataSelectExecutor.h | 4 ++ src/Storages/MergeTree/PartitionPruner.h | 2 + 5 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7fac4ca738b..a7ea4edf684 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -15,6 +15,7 @@ ReadFromMergeTree::ReadFromMergeTree( String query_id_, Names required_columns_, RangesInDataParts parts_, + IndexStatPtr index_stats_, PrewhereInfoPtr prewhere_info_, Names virt_column_names_, Settings settings_, @@ -30,6 +31,7 @@ ReadFromMergeTree::ReadFromMergeTree( , query_id(std::move(query_id_)) , required_columns(std::move(required_columns_)) , parts(std::move(parts_)) + , index_stats(std::move(index_stats_)) , prewhere_info(std::move(prewhere_info_)) , virt_column_names(std::move(virt_column_names_)) , settings(std::move(settings_)) @@ -148,4 +150,19 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build pipeline.init(std::move(pipe)); } +void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const +{ + if (index_stats) + { + std::string prefix(format_settings.offset + format_settings.indent, format_settings.indent_char); + for (const auto & stat : *index_stats) + { + std::string pref(format_settings.indent, format_settings.indent_char); + format_settings.out << prefix << stat.description << '\n'; + format_settings.out << prefix << pref << "Parts: " << stat.num_parts_after << '\n'; + format_settings.out << prefix << pref << "Granules: " << stat.num_granules_after << '\n'; + } + } +} + } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 08043490bc7..8f2c36ad03e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -12,6 +12,23 @@ class ReadFromMergeTree : public ISourceStep { public: + struct IndexStat + { + std::string description; + size_t num_parts_after; + size_t num_granules_after; + + IndexStat(std::string description_, size_t num_parts_after_, size_t num_granules_after_) + : description(std::move(description_)) + , num_parts_after(num_parts_after_) + , num_granules_after(num_granules_after_) + { + } + }; + + using IndexStats = std::vector; + using IndexStatPtr = std::unique_ptr; + struct Settings { UInt64 max_block_size; @@ -30,6 +47,7 @@ public: String query_id_, Names required_columns_, RangesInDataParts parts_, + IndexStatPtr index_stats_, PrewhereInfoPtr prewhere_info_, Names virt_column_names_, Settings settings_, @@ -42,6 +60,8 @@ public: void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void describeActions(FormatSettings & format_settings) const override; + private: const MergeTreeData & storage; StorageMetadataPtr metadata_snapshot; @@ -49,6 +69,7 @@ private: Names required_columns; RangesInDataParts parts; + IndexStatPtr index_stats; PrewhereInfoPtr prewhere_info; Names virt_column_names; Settings settings; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index e6ee156ff7f..f643e244b3a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -274,12 +274,34 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context; PartFilterCounters part_filter_counters; + auto index_stats = std::make_unique(); if (query_context.getSettingsRef().allow_experimental_query_deduplication) selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context, part_filter_counters); else selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, part_filter_counters); + index_stats->emplace_back( + "None", + part_filter_counters.num_initial_selected_parts, + part_filter_counters.num_initial_selected_granules); + + if (minmax_idx_condition) + { + index_stats->emplace_back( + minmax_idx_condition->toString(), + part_filter_counters.num_parts_after_minmax, + part_filter_counters.num_granules_after_minmax); + } + + if (partition_pruner) + { + index_stats->emplace_back( + partition_pruner->toString(), + part_filter_counters.num_parts_after_minmax, + part_filter_counters.num_granules_after_minmax); + } + /// Sampling. Names column_names_to_read = real_column_names; std::shared_ptr filter_function; @@ -614,6 +636,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( RangesInDataParts parts_with_ranges(parts.size()); size_t sum_marks = 0; std::atomic sum_marks_pk = 0; + std::atomic sum_parts_pk = 0; std::atomic total_marks_pk = 0; size_t sum_ranges = 0; @@ -649,6 +672,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed); + if (!ranges.ranges.empty()) + sum_parts_pk.fetch_add(1, std::memory_order_relaxed); + for (auto & index_and_condition : useful_indices) { if (ranges.ranges.empty()) @@ -735,12 +761,25 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( parts_with_ranges.resize(next_part); } + if (metadata_snapshot->hasPrimaryKey()) + { + index_stats->emplace_back( + key_condition.toString(), + sum_parts_pk.load(std::memory_order_relaxed), + sum_marks_pk.load(std::memory_order_relaxed)); + } + for (const auto & index_and_condition : useful_indices) { const auto & index_name = index_and_condition.index->index.name; LOG_DEBUG(log, "Index {} has dropped {}/{} granules.", backQuote(index_name), index_and_condition.granules_dropped, index_and_condition.total_granules); + + index_stats->emplace_back( + index_name, + index_and_condition.total_parts - index_and_condition.parts_dropped, + index_and_condition.total_granules - index_and_condition.granules_dropped); } LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", @@ -807,6 +846,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( plan = spreadMarkRangesAmongStreamsFinal( std::move(parts_with_ranges), + std::move(index_stats), num_streams, column_names_to_read, metadata_snapshot, @@ -830,6 +870,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( plan = spreadMarkRangesAmongStreamsWithOrder( std::move(parts_with_ranges), + std::move(index_stats), num_streams, column_names_to_read, metadata_snapshot, @@ -847,6 +888,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( { plan = spreadMarkRangesAmongStreams( std::move(parts_with_ranges), + std::move(index_stats), num_streams, column_names_to_read, metadata_snapshot, @@ -960,6 +1002,7 @@ size_t minMarksForConcurrentRead( QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -1032,7 +1075,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( auto plan = std::make_unique(); auto step = std::make_unique( data, metadata_snapshot, query_id, - column_names, std::move(parts), query_info.prewhere_info, virt_columns, + column_names, std::move(parts), std::move(index_stats), query_info.prewhere_info, virt_columns, step_settings, num_streams, /*allow_mix_streams*/ true, /*read_reverse*/ false); plan->addStep(std::move(step)); @@ -1049,6 +1092,7 @@ static ActionsDAGPtr createProjection(const Block & header) QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -1230,7 +1274,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( auto plan = std::make_unique(); auto step = std::make_unique( data, metadata_snapshot, query_id, - column_names, std::move(new_parts), query_info.prewhere_info, virt_columns, + column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ read_reverse); plan->addStep(std::move(step)); @@ -1292,6 +1336,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -1412,7 +1457,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( plan = std::make_unique(); auto step = std::make_unique( data, metadata_snapshot, query_id, - column_names, std::move(new_parts), query_info.prewhere_info, virt_columns, + column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ false); plan->addStep(std::move(step)); @@ -1495,7 +1540,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( auto plan = std::make_unique(); auto step = std::make_unique( data, metadata_snapshot, query_id, - column_names, std::move(lonely_parts), query_info.prewhere_info, virt_columns, + column_names, std::move(lonely_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, step_settings, num_streams_for_lonely_parts, /*allow_mix_streams*/ true, /*read_reverse*/ false); plan->addStep(std::move(step)); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index b64e80646f7..83de1a10d4d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -51,6 +52,7 @@ private: QueryPlanPtr spreadMarkRangesAmongStreams( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -65,6 +67,7 @@ private: /// out_projection - save projection only with columns, requested to read QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -80,6 +83,7 @@ private: QueryPlanPtr spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, + ReadFromMergeTree::IndexStatPtr index_stats, size_t num_streams, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 3cb7552c427..fad292d06e4 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -32,6 +32,8 @@ public: bool canBePruned(const DataPartPtr & part); bool isUseless() const { return useless; } + + std::string toString() const { return partition_condition.toString(); } }; } From 7ffbeac9dfc2b03e6aa8f22760b27d55be72ee82 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 14:48:54 +0300 Subject: [PATCH 06/22] Add info about indexes to ReadFromMergeTree step. --- .../QueryPlan/ReadFromMergeTree.cpp | 40 +++++++++++-- src/Processors/QueryPlan/ReadFromMergeTree.h | 18 +++--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 56 +++++++++++-------- .../MergeTree/MergeTreeDataSelectExecutor.h | 4 +- 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index a7ea4edf684..dba57f9e3d5 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -150,17 +150,45 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build pipeline.init(std::move(pipe)); } +static const char * indexTypeToString(ReadFromMergeTree::IndexType type) +{ + switch (type) + { + case ReadFromMergeTree::IndexType::None: + return "None"; + case ReadFromMergeTree::IndexType::MinMax: + return "MinMax"; + case ReadFromMergeTree::IndexType::Partition: + return "Partition"; + case ReadFromMergeTree::IndexType::PrimaryKey: + return "PrimaryKey"; + case ReadFromMergeTree::IndexType::Skip: + return "Skip"; + } + + __builtin_unreachable(); +} + void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const { - if (index_stats) + if (index_stats && !index_stats->empty()) { - std::string prefix(format_settings.offset + format_settings.indent, format_settings.indent_char); + std::string prefix(format_settings.offset, format_settings.indent_char); + std::string indent(format_settings.indent, format_settings.indent_char); + format_settings.out << prefix << "Indexes:\n"; + for (const auto & stat : *index_stats) { - std::string pref(format_settings.indent, format_settings.indent_char); - format_settings.out << prefix << stat.description << '\n'; - format_settings.out << prefix << pref << "Parts: " << stat.num_parts_after << '\n'; - format_settings.out << prefix << pref << "Granules: " << stat.num_granules_after << '\n'; + format_settings.out << prefix << indent << indexTypeToString(stat.type) << '\n'; + + if (!stat.name.empty()) + format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n'; + + if (!stat.description.empty()) + format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n'; + + format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after << '\n'; + format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after << '\n'; } } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 8f2c36ad03e..85c412ff003 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -12,18 +12,22 @@ class ReadFromMergeTree : public ISourceStep { public: + enum class IndexType + { + None, + MinMax, + Partition, + PrimaryKey, + Skip, + }; + struct IndexStat { + IndexType type; + std::string name; std::string description; size_t num_parts_after; size_t num_granules_after; - - IndexStat(std::string description_, size_t num_parts_after_, size_t num_granules_after_) - : description(std::move(description_)) - , num_parts_after(num_parts_after_) - , num_granules_after(num_granules_after_) - { - } }; using IndexStats = std::vector; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f643e244b3a..8bbd2d7a2d9 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -281,25 +281,27 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( else selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, part_filter_counters); - index_stats->emplace_back( - "None", - part_filter_counters.num_initial_selected_parts, - part_filter_counters.num_initial_selected_granules); + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::None, + .num_parts_after = part_filter_counters.num_initial_selected_parts, + .num_granules_after = part_filter_counters.num_initial_selected_granules}); if (minmax_idx_condition) { - index_stats->emplace_back( - minmax_idx_condition->toString(), - part_filter_counters.num_parts_after_minmax, - part_filter_counters.num_granules_after_minmax); + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::MinMax, + .description = minmax_idx_condition->toString(), + .num_parts_after = part_filter_counters.num_parts_after_minmax, + .num_granules_after = part_filter_counters.num_granules_after_minmax}); } if (partition_pruner) { - index_stats->emplace_back( - partition_pruner->toString(), - part_filter_counters.num_parts_after_minmax, - part_filter_counters.num_granules_after_minmax); + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::Partition, + .description = partition_pruner->toString(), + .num_parts_after = part_filter_counters.num_parts_after_partition_pruner, + .num_granules_after = part_filter_counters.num_granules_after_partition_pruner}); } /// Sampling. @@ -763,10 +765,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( if (metadata_snapshot->hasPrimaryKey()) { - index_stats->emplace_back( - key_condition.toString(), - sum_parts_pk.load(std::memory_order_relaxed), - sum_marks_pk.load(std::memory_order_relaxed)); + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::PrimaryKey, + .description = key_condition.toString(), + .num_parts_after = sum_parts_pk.load(std::memory_order_relaxed), + .num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)}); } for (const auto & index_and_condition : useful_indices) @@ -776,10 +779,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( backQuote(index_name), index_and_condition.granules_dropped, index_and_condition.total_granules); - index_stats->emplace_back( - index_name, - index_and_condition.total_parts - index_and_condition.parts_dropped, - index_and_condition.total_granules - index_and_condition.granules_dropped); + std::string description = index_and_condition.index->index.type + + " GRANULARITY " + std::to_string(index_and_condition.index->index.granularity); + + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::Skip, + .name = index_name, + .description = std::move(description), + .num_parts_after = index_and_condition.total_parts - index_and_condition.parts_dropped, + .num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped}); } LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", @@ -1889,8 +1897,8 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( continue; } - counters.num_parts_after_partition += 1; - counters.num_granules_after_partition += num_granules; + counters.num_parts_after_partition_pruner += 1; + counters.num_granules_after_partition_pruner += num_granules; parts.push_back(part); } @@ -1959,8 +1967,8 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( continue; } - counters.num_parts_after_partition += 1; - counters.num_granules_after_partition += num_granules; + counters.num_parts_after_partition_pruner += 1; + counters.num_granules_after_partition_pruner += num_granules; /// populate UUIDs and exclude ignored parts if enabled if (part->uuid != UUIDHelpers::Nil) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 83de1a10d4d..144186816d1 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -127,8 +127,8 @@ private: size_t num_initial_selected_granules = 0; size_t num_parts_after_minmax = 0; size_t num_granules_after_minmax = 0; - size_t num_parts_after_partition = 0; - size_t num_granules_after_partition = 0; + size_t num_parts_after_partition_pruner = 0; + size_t num_granules_after_partition_pruner = 0; }; /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, From febb187da06f993b681a6b97fb3e590f07e3a6d2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 15:54:27 +0300 Subject: [PATCH 07/22] Added test, --- .../QueryPlan/ReadFromMergeTree.cpp | 42 ++++++++++++------- src/Processors/QueryPlan/ReadFromMergeTree.h | 13 ++++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 14 ++++--- 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index dba57f9e3d5..380c72e0d06 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -20,8 +20,7 @@ ReadFromMergeTree::ReadFromMergeTree( Names virt_column_names_, Settings settings_, size_t num_streams_, - bool allow_mix_streams_, - bool read_reverse_) + ReadType read_type_) : ISourceStep(DataStream{.header = MergeTreeBaseSelectProcessor::transformHeader( metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), prewhere_info_, @@ -36,8 +35,7 @@ ReadFromMergeTree::ReadFromMergeTree( , virt_column_names(std::move(virt_column_names_)) , settings(std::move(settings_)) , num_streams(num_streams_) - , allow_mix_streams(allow_mix_streams_) - , read_reverse(read_reverse_) + , read_type(read_type_) { } @@ -104,7 +102,7 @@ Pipe ReadFromMergeTree::readFromSeparateParts() Pipes pipes; for (const auto & part : parts) { - auto source = read_reverse + auto source = read_type == ReadType::InReverseOrder ? createSource(part) : createSource(part); @@ -121,17 +119,15 @@ Pipe ReadFromMergeTree::readFromSeparateParts() Pipe ReadFromMergeTree::read() { - if (allow_mix_streams && num_streams > 1) + if (read_type == ReadType::Default && num_streams > 1) return readFromPool(); auto pipe = readFromSeparateParts(); - if (allow_mix_streams) - { - /// Use ConcatProcessor to concat sources together. - /// It is needed to read in parts order (and so in PK order) if single thread is used. - if (pipe.numOutputPorts() > 1) - pipe.addTransform(std::make_shared(pipe.getHeader(), pipe.numOutputPorts())); - } + + /// Use ConcatProcessor to concat sources together. + /// It is needed to read in parts order (and so in PK order) if single thread is used. + if (read_type == ReadType::Default && pipe.numOutputPorts() > 1) + pipe.addTransform(std::make_shared(pipe.getHeader(), pipe.numOutputPorts())); return pipe; } @@ -169,11 +165,29 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type) __builtin_unreachable(); } +static const char * readTypeToString(ReadFromMergeTree::ReadType type) +{ + switch (type) + { + case ReadFromMergeTree::ReadType::Default: + return "Default"; + case ReadFromMergeTree::ReadType::InOrder: + return "InOrder"; + case ReadFromMergeTree::ReadType::InReverseOrder: + return "InReverseOrder"; + } + + __builtin_unreachable(); +} + void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const { + std::string prefix(format_settings.offset, format_settings.indent_char); + format_settings.out << prefix << "ReadType: " << readTypeToString(read_type) << '\n'; + if (index_stats && !index_stats->empty()) { - std::string prefix(format_settings.offset, format_settings.indent_char); + std::string indent(format_settings.indent, format_settings.indent_char); format_settings.out << prefix << "Indexes:\n"; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 85c412ff003..af26909d3fa 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -45,6 +45,13 @@ public: MergeTreeReadPool::BackoffSettings backoff_settings; }; + enum class ReadType + { + Default, + InOrder, + InReverseOrder, + }; + explicit ReadFromMergeTree( const MergeTreeData & storage_, StorageMetadataPtr metadata_snapshot_, @@ -56,8 +63,7 @@ public: Names virt_column_names_, Settings settings_, size_t num_streams_, - bool allow_mix_streams_, - bool read_reverse_ + ReadType read_type_ ); String getName() const override { return "ReadFromMergeTree"; } @@ -79,8 +85,7 @@ private: Settings settings; size_t num_streams; - bool allow_mix_streams; - bool read_reverse; + ReadType read_type; Pipe read(); Pipe readFromPool(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8bbd2d7a2d9..c2613873871 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1084,7 +1084,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( auto step = std::make_unique( data, metadata_snapshot, query_id, column_names, std::move(parts), std::move(index_stats), query_info.prewhere_info, virt_columns, - step_settings, num_streams, /*allow_mix_streams*/ true, /*read_reverse*/ false); + step_settings, num_streams, ReadFromMergeTree::ReadType::Default); plan->addStep(std::move(step)); return plan; @@ -1277,17 +1277,19 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( .backoff_settings = MergeTreeReadPool::BackoffSettings(settings), }; - bool read_reverse = input_order_info->direction != 1; + auto read_type = input_order_info->direction == 1 + ? ReadFromMergeTree::ReadType::InOrder + : ReadFromMergeTree::ReadType::InReverseOrder; auto plan = std::make_unique(); auto step = std::make_unique( data, metadata_snapshot, query_id, column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, - step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ read_reverse); + step_settings, num_streams, read_type); plan->addStep(std::move(step)); - if (read_reverse) + if (read_type == ReadFromMergeTree::ReadType::InReverseOrder) { auto reverse_step = std::make_unique(plan->getCurrentDataStream()); plan->addStep(std::move(reverse_step)); @@ -1466,7 +1468,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( auto step = std::make_unique( data, metadata_snapshot, query_id, column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, - step_settings, num_streams, /*allow_mix_streams*/ false, /*read_reverse*/ false); + step_settings, num_streams, ReadFromMergeTree::ReadType::InOrder); plan->addStep(std::move(step)); @@ -1549,7 +1551,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( auto step = std::make_unique( data, metadata_snapshot, query_id, column_names, std::move(lonely_parts), std::move(index_stats), query_info.prewhere_info, virt_columns, - step_settings, num_streams_for_lonely_parts, /*allow_mix_streams*/ true, /*read_reverse*/ false); + step_settings, num_streams_for_lonely_parts, ReadFromMergeTree::ReadType::Default); plan->addStep(std::move(step)); From 794d7c89b69c04339d442ed0577150b3b23cc7d5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 16:18:17 +0300 Subject: [PATCH 08/22] Remove ReverseRowsStep --- .../QueryPlan/ReadFromMergeTree.cpp | 22 +++++++---- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- src/Processors/QueryPlan/ReverseRowsStep.cpp | 37 ------------------- src/Processors/QueryPlan/ReverseRowsStep.h | 18 --------- src/Processors/ya.make | 1 - .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 ---- 6 files changed, 15 insertions(+), 73 deletions(-) delete mode 100644 src/Processors/QueryPlan/ReverseRowsStep.cpp delete mode 100644 src/Processors/QueryPlan/ReverseRowsStep.h diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 380c72e0d06..6d713add336 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -97,7 +98,7 @@ ProcessorPtr ReadFromMergeTree::createSource(const RangesInDataPart & part) prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query); } -Pipe ReadFromMergeTree::readFromSeparateParts() +Pipe ReadFromMergeTree::readInOrder() { Pipes pipes; for (const auto & part : parts) @@ -106,15 +107,20 @@ Pipe ReadFromMergeTree::readFromSeparateParts() ? createSource(part) : createSource(part); - std::make_shared( - storage, metadata_snapshot, part.data_part, settings.max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, required_columns, part.ranges, settings.use_uncompressed_cache, - prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query); - pipes.emplace_back(std::move(source)); } - return Pipe::unitePipes(std::move(pipes)); + auto pipe = Pipe::unitePipes(std::move(pipes)); + + if (read_type == ReadType::InReverseOrder) + { + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header); + }); + } + + return pipe; } Pipe ReadFromMergeTree::read() @@ -122,7 +128,7 @@ Pipe ReadFromMergeTree::read() if (read_type == ReadType::Default && num_streams > 1) return readFromPool(); - auto pipe = readFromSeparateParts(); + auto pipe = readInOrder(); /// Use ConcatProcessor to concat sources together. /// It is needed to read in parts order (and so in PK order) if single thread is used. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index af26909d3fa..e6e949e91ce 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -89,7 +89,7 @@ private: Pipe read(); Pipe readFromPool(); - Pipe readFromSeparateParts(); + Pipe readInOrder(); template ProcessorPtr createSource(const RangesInDataPart & part); diff --git a/src/Processors/QueryPlan/ReverseRowsStep.cpp b/src/Processors/QueryPlan/ReverseRowsStep.cpp deleted file mode 100644 index 0a2e9f20cd9..00000000000 --- a/src/Processors/QueryPlan/ReverseRowsStep.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits() -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = true, - } - }; -} - -ReverseRowsStep::ReverseRowsStep(const DataStream & input_stream_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits()) -{ -} - -void ReverseRowsStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header); - }); -} - -} diff --git a/src/Processors/QueryPlan/ReverseRowsStep.h b/src/Processors/QueryPlan/ReverseRowsStep.h deleted file mode 100644 index 08d7833d130..00000000000 --- a/src/Processors/QueryPlan/ReverseRowsStep.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -/// Reverse rows in chunk. -class ReverseRowsStep : public ITransformingStep -{ -public: - explicit ReverseRowsStep(const DataStream & input_stream_); - - String getName() const override { return "ReverseRows"; } - - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; -}; - -} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 9ef888ff1d8..48ec3ac7a87 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -128,7 +128,6 @@ SRCS( QueryPlan/ReadFromMergeTree.cpp QueryPlan/ReadFromPreparedSource.cpp QueryPlan/ReadNothingStep.cpp - QueryPlan/ReverseRowsStep.cpp QueryPlan/RollupStep.cpp QueryPlan/SettingQuotaAndLimitsStep.cpp QueryPlan/TotalsHavingStep.cpp diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c2613873871..4f7651ff850 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -1288,13 +1287,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( step_settings, num_streams, read_type); plan->addStep(std::move(step)); - - if (read_type == ReadFromMergeTree::ReadType::InReverseOrder) - { - auto reverse_step = std::make_unique(plan->getCurrentDataStream()); - plan->addStep(std::move(reverse_step)); - } - plans.emplace_back(std::move(plan)); } From 0a43b729b1b82d4e40b3139482e78ceaa78dc6f3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 18:19:49 +0300 Subject: [PATCH 09/22] Add test. --- .../01786_explain_merge_tree.reference | 71 +++++++++++++++++++ .../0_stateless/01786_explain_merge_tree.sh | 25 +++++++ 2 files changed, 96 insertions(+) create mode 100644 tests/queries/0_stateless/01786_explain_merge_tree.reference create mode 100755 tests/queries/0_stateless/01786_explain_merge_tree.sh diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference new file mode 100644 index 00000000000..64b1d2f74cc --- /dev/null +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -0,0 +1,71 @@ + ReadFromMergeTree + ReadType: Default + Indexes: + None + Parts: 5 + Granules: 12 + MinMax + Description: unknown, unknown, and, (column 0 in [1, +inf)), unknown, and, unknown, and, and, unknown, unknown, and, and + Parts: 4 + Granules: 11 + Partition + Description: unknown, unknown, and, (column 0 in [1, +inf)), (column 1 not in [1, 1]), and, unknown, and, and, unknown, unknown, and, and + Parts: 3 + Granules: 10 + PrimaryKey + Description: unknown, unknown, and, (column 1 in [1, +inf)), unknown, and, (column 0 in [11, +inf)), and, and, unknown, unknown, and, and + Parts: 2 + Granules: 6 + Skip + Name: t_minmax + Description: minmax GRANULARITY 2 + Parts: 1 + Granules: 2 + Skip + Name: t_set + Description: set GRANULARITY 2 + Parts: 1 + Granules: 1 +----------------- + ReadFromMergeTree + ReadType: InOrder + Indexes: + None + Parts: 5 + Granules: 12 + MinMax + Description: unknown + Parts: 5 + Granules: 12 + Partition + Description: unknown + Parts: 5 + Granules: 12 + PrimaryKey + Description: (column 0 in [11, +inf)) + Parts: 2 + Granules: 6 + ReadFromMergeTree + ReadType: InOrder +----------------- + ReadFromMergeTree + ReadType: InReverseOrder + Indexes: + None + Parts: 5 + Granules: 12 + MinMax + Description: unknown + Parts: 5 + Granules: 12 + Partition + Description: unknown + Parts: 5 + Granules: 12 + PrimaryKey + Description: (column 0 in [11, +inf)) + Parts: 2 + Granules: 6 + ReverseRows + ReadFromMergeTree + ReadType: InReverseOrder diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh new file mode 100755 index 00000000000..c24d5ac6461 --- /dev/null +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2" +$CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" + +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; + " | grep -A 100 "ReadFromMergeTree" + +echo "-----------------" + +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select x from test_index where x > 10 order by x; + " | grep -A 100 "ReadFromMergeTree" + +echo "-----------------" + +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select x from test_index where x > 10 order by x desc; + " | grep -A 100 "ReadFromMergeTree" + From a2906e902ea48cca6826679b52e4b5ee24539e52 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Apr 2021 21:19:57 +0300 Subject: [PATCH 10/22] Fix test. --- .../01786_explain_merge_tree.reference | 77 +++++++++---------- .../0_stateless/01786_explain_merge_tree.sh | 4 +- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 64b1d2f74cc..df32c1d367c 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -27,45 +27,40 @@ Parts: 1 Granules: 1 ----------------- - ReadFromMergeTree - ReadType: InOrder - Indexes: - None - Parts: 5 - Granules: 12 - MinMax - Description: unknown - Parts: 5 - Granules: 12 - Partition - Description: unknown - Parts: 5 - Granules: 12 - PrimaryKey - Description: (column 0 in [11, +inf)) - Parts: 2 - Granules: 6 - ReadFromMergeTree - ReadType: InOrder + ReadFromMergeTree + ReadType: InOrder + Indexes: + None + Parts: 5 + Granules: 12 + MinMax + Description: unknown + Parts: 5 + Granules: 12 + Partition + Description: unknown + Parts: 5 + Granules: 12 + PrimaryKey + Description: (column 0 in [16, +inf)) + Parts: 1 + Granules: 3 ----------------- - ReadFromMergeTree - ReadType: InReverseOrder - Indexes: - None - Parts: 5 - Granules: 12 - MinMax - Description: unknown - Parts: 5 - Granules: 12 - Partition - Description: unknown - Parts: 5 - Granules: 12 - PrimaryKey - Description: (column 0 in [11, +inf)) - Parts: 2 - Granules: 6 - ReverseRows - ReadFromMergeTree - ReadType: InReverseOrder + ReadFromMergeTree + ReadType: InReverseOrder + Indexes: + None + Parts: 5 + Granules: 12 + MinMax + Description: unknown + Parts: 5 + Granules: 12 + Partition + Description: unknown + Parts: 5 + Granules: 12 + PrimaryKey + Description: (column 0 in [16, +inf)) + Parts: 1 + Granules: 3 diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index c24d5ac6461..87116b57054 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -14,12 +14,12 @@ $CLICKHOUSE_CLIENT -q " echo "-----------------" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select x from test_index where x > 10 order by x; + explain actions = 1 select x from test_index where x > 15 order by x; " | grep -A 100 "ReadFromMergeTree" echo "-----------------" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select x from test_index where x > 10 order by x desc; + explain actions = 1 select x from test_index where x > 15 order by x desc; " | grep -A 100 "ReadFromMergeTree" From d0b5615641a741367e10cba9e529cec00ec02be2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Apr 2021 12:35:21 +0300 Subject: [PATCH 11/22] Add comments. --- src/Processors/QueryPlan/ReadFromMergeTree.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index e6e949e91ce..910417b4883 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -7,7 +7,8 @@ namespace DB { -/// Create source from prepared pipe. +/// This step is created to read from MergeTree* table. +/// For now, it takes a list of parts and creates source from it. class ReadFromMergeTree : public ISourceStep { public: @@ -21,6 +22,8 @@ public: Skip, }; + /// This is a struct with information about applied indexes. + /// Is used for introspection only, in EXPLAIN query. struct IndexStat { IndexType type; @@ -33,6 +36,7 @@ public: using IndexStats = std::vector; using IndexStatPtr = std::unique_ptr; + /// Part of settings which are needed for reading. struct Settings { UInt64 max_block_size; @@ -47,8 +51,16 @@ public: enum class ReadType { + /// By default, read will use MergeTreeReadPool and return pipe with num_streams outputs. + /// If num_streams == 1, will read without pool, in order specified in parts. Default, + /// Read in sorting key order. + /// Returned pipe will have the number of ports equals to parts.size(). + /// Parameter num_streams_ is ignored in this case. + /// User should add MergingSorted itself if needed. InOrder, + /// The same as InOrder, but in reverse order. + /// For every part, read ranges and granules from end to begin. Also add ReverseTransform. InReverseOrder, }; From 865ae553b852c1b0fc10d80cefbf6b59ecca8f6a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Apr 2021 12:48:08 +0300 Subject: [PATCH 12/22] Fix test output. --- .../01508_partition_pruning_long.reference | 110 +++++++++--------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference index 70f529c6058..334ecb63164 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.reference +++ b/tests/queries/0_stateless/01508_partition_pruning_long.reference @@ -5,11 +5,11 @@ Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primar select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); 2 2880 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; 0 0 @@ -17,27 +17,27 @@ Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primar select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; 2 2880 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; 3 15000 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00'); 6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00'); 0 0 @@ -45,117 +45,117 @@ Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primar select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; 2 6440 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; 2 2880 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; 2 6440 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; 4 20000 -Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges +Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; 3 11440 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; 3 11440 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; 1 1440 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; 3 9999 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; 4 20000 -Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges +Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; 2 9999 -Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/3 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; 1 10000 -Selected 1/3 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/3 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; 2 20000 -Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/3 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges --------- tDD ---------------------------- select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); 1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); 1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; 1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; 3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; 3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges --------- sDD ---------------------------- select uniqExact(_part), count() from sDD; 6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; 3 9999 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; 2 9999 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; 0 0 @@ -163,52 +163,52 @@ Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primar select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); 3 11440 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from sDD where d >= 1598918400000; 4 20000 -Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges +Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; 3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges --------- xMM ---------------------------- select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; 3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; 1 1 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; 2 5001 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; 1 5000 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where a = 1; 3 15000 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges select uniqExact(_part), count() from xMM where a = 66; 0 0 @@ -216,29 +216,29 @@ Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primar select uniqExact(_part), count() from xMM where a <> 66; 6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges select uniqExact(_part), count() from xMM where a = 2; 2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where a = 1; 2 15000 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; 1 10000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges select uniqExact(_part), count() from xMM where a <> 66; 5 30000 -Selected 5/5 parts by partition key, 5 parts by primary key, 5/10 marks by primary key, 5 marks to read from 5 ranges +Selected 5/5 parts by partition key, 5 parts by primary key, 5/5 marks by primary key, 5 marks to read from 5 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; 2 5001 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; 1 5000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges From 9b67067f059fe2e66de3200d376b59b6d2018224 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 13 Apr 2021 11:53:23 +0300 Subject: [PATCH 13/22] Fix flappy test. --- tests/queries/0_stateless/01786_explain_merge_tree.reference | 5 ----- tests/queries/0_stateless/01786_explain_merge_tree.sh | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index df32c1d367c..29f4fea3107 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -5,25 +5,20 @@ Parts: 5 Granules: 12 MinMax - Description: unknown, unknown, and, (column 0 in [1, +inf)), unknown, and, unknown, and, and, unknown, unknown, and, and Parts: 4 Granules: 11 Partition - Description: unknown, unknown, and, (column 0 in [1, +inf)), (column 1 not in [1, 1]), and, unknown, and, and, unknown, unknown, and, and Parts: 3 Granules: 10 PrimaryKey - Description: unknown, unknown, and, (column 1 in [1, +inf)), unknown, and, (column 0 in [11, +inf)), and, and, unknown, unknown, and, and Parts: 2 Granules: 6 Skip Name: t_minmax - Description: minmax GRANULARITY 2 Parts: 1 Granules: 2 Skip Name: t_set - Description: set GRANULARITY 2 Parts: 1 Granules: 1 ----------------- diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 87116b57054..b94f284478c 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : nu $CLICKHOUSE_CLIENT -q " explain actions = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; - " | grep -A 100 "ReadFromMergeTree" + " | grep -A 100 "ReadFromMergeTree" | grep -v "Description" echo "-----------------" From 531e9ba6e37c968412ac80cfc89486de55e47e67 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 13 Apr 2021 17:37:59 +0300 Subject: [PATCH 14/22] Skip test for antlr. --- tests/queries/skip_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index d41a41bd524..4c6afba3af6 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -390,7 +390,8 @@ "01655_plan_optimizations", "01475_read_subcolumns_storages", "01674_clickhouse_client_query_param_cte", - "01666_merge_tree_max_query_limit" + "01666_merge_tree_max_query_limit", + "01786_explain_merge_tree" ], "parallel": [ From be52b2889a7382bb8478325592b18502cec9d7fe Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 15 Apr 2021 20:30:04 +0300 Subject: [PATCH 15/22] Better description for key condition. --- .../QueryPlan/ReadFromMergeTree.cpp | 7 + src/Processors/QueryPlan/ReadFromMergeTree.h | 5 +- src/Storages/MergeTree/KeyCondition.cpp | 228 +++++++++++++++++- src/Storages/MergeTree/KeyCondition.h | 10 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 5 +- 5 files changed, 248 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6d713add336..a34a93cf42b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -204,6 +204,13 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const if (!stat.name.empty()) format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n'; + if (!stat.used_keys.empty()) + { + format_settings.out << prefix << indent << indent << "Keys: " << stat.name << '\n'; + for (const auto & used_key : stat.used_keys) + format_settings.out << prefix << indent << indent << indent << used_key << '\n'; + } + if (!stat.description.empty()) format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n'; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 910417b4883..713f8f448ae 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -9,7 +9,7 @@ namespace DB /// This step is created to read from MergeTree* table. /// For now, it takes a list of parts and creates source from it. -class ReadFromMergeTree : public ISourceStep +class ReadFromMergeTree final : public ISourceStep { public: @@ -29,6 +29,7 @@ public: IndexType type; std::string name; std::string description; + std::vector used_keys; size_t num_parts_after; size_t num_granules_after; }; @@ -64,7 +65,7 @@ public: InReverseOrder, }; - explicit ReadFromMergeTree( + ReadFromMergeTree( const MergeTreeData & storage_, StorageMetadataPtr metadata_snapshot_, String query_id_, diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7f33bba14fd..9d1a8c981ca 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -938,6 +938,9 @@ public: return func->getMonotonicityForRange(type, left, right); } + Kind getKind() const { return kind; } + const ColumnWithTypeAndName & getConstArg() const { return const_arg; } + private: FunctionBasePtr func; ColumnWithTypeAndName const_arg; @@ -1308,6 +1311,203 @@ String KeyCondition::toString() const return res; } +KeyCondition::Description KeyCondition::getDescription() const +{ + Description description; + struct Node + { + enum class Type + { + Leaf, + True, + False, + And, + Or, + }; + + Type type; + + /// Only for Leaf + const RPNElement * element = nullptr; + bool negate = false; + + std::unique_ptr left = nullptr; + std::unique_ptr right = nullptr; + }; + + struct Frame + { + std::unique_ptr can_be_true; + std::unique_ptr can_be_false; + }; + + auto combine = [](std::unique_ptr left, std::unique_ptr right, Node::Type type) + { + if (type == Node::Type::And) + { + /// false AND right + if (left->type == Node::Type::False) + return left; + + /// left AND false + if (right->type == Node::Type::False) + return right; + + /// true AND right + if (left->type == Node::Type::True) + return right; + + /// left AND true + if (right->type == Node::Type::True) + return left; + } + + if (type == Node::Type::Or) + { + /// false OR right + if (left->type == Node::Type::False) + return right; + + /// left OR false + if (right->type == Node::Type::False) + return left; + + /// true OR right + if (left->type == Node::Type::True) + return left; + + /// left OR true + if (right->type == Node::Type::True) + return right; + } + + return std::make_unique(Node{ + .type = type, + .left = std::move(left), + .right = std::move(right) + }); + }; + + std::vector rpn_stack; + for (const auto & element : rpn) + { + if (element.function == RPNElement::FUNCTION_UNKNOWN) + { + auto can_be_true = std::make_unique(Node{.type = Node::Type::True}); + auto can_be_false = std::make_unique(Node{.type = Node::Type::True}); + rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); + } + else if ( + element.function == RPNElement::FUNCTION_IN_RANGE + || element.function == RPNElement::FUNCTION_NOT_IN_RANGE + || element.function == RPNElement::FUNCTION_IN_SET + || element.function == RPNElement::FUNCTION_NOT_IN_SET) + { + auto can_be_true = std::make_unique(Node{.type = Node::Type::Leaf, .element = &element, .negate = false}); + auto can_be_false = std::make_unique(Node{.type = Node::Type::Leaf, .element = &element, .negate = true}); + rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); + } + else if (element.function == RPNElement::FUNCTION_NOT) + { + assert(!rpn_stack.empty()); + + std::swap(rpn_stack.back().can_be_true, rpn_stack.back().can_be_false); + } + else if (element.function == RPNElement::FUNCTION_AND) + { + assert(!rpn_stack.empty()); + auto arg1 = std::move(rpn_stack.back()); + + rpn_stack.pop_back(); + + assert(!rpn_stack.empty()); + auto arg2 = std::move(rpn_stack.back()); + + Frame frame; + frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::And); + frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::Or); + + rpn_stack.back() = std::move(frame); + } + else if (element.function == RPNElement::FUNCTION_OR) + { + assert(!rpn_stack.empty()); + auto arg1 = std::move(rpn_stack.back()); + + rpn_stack.pop_back(); + + assert(!rpn_stack.empty()); + auto arg2 = std::move(rpn_stack.back()); + + Frame frame; + frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::Or); + frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::And); + + rpn_stack.back() = std::move(frame); + } + else if (element.function == RPNElement::ALWAYS_FALSE) + { + auto can_be_true = std::make_unique(Node{.type = Node::Type::False}); + auto can_be_false = std::make_unique(Node{.type = Node::Type::True}); + + rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); + } + else if (element.function == RPNElement::ALWAYS_TRUE) + { + auto can_be_true = std::make_unique(Node{.type = Node::Type::True}); + auto can_be_false = std::make_unique(Node{.type = Node::Type::False}); + rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); + } + else + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + } + + if (rpn_stack.size() != 1) + throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + + std::vector key_names(key_columns.size()); + std::vector is_key_used(key_columns.size(), false); + + for (const auto & key : key_columns) + key_names[key.second] = key.first; + + std::function describe; + describe = [&describe, &key_names, &is_key_used](const Node * node) -> std::string + { + switch (node->type) + { + case Node::Type::Leaf: + { + is_key_used[node->element->key_column] = true; + std::string res; + if (node->negate) + res += "not("; + res += node->element->toString(key_names[node->element->key_column], true); + if (node->negate) + res += ")"; + return res; + } + case Node::Type::True: + return "true"; + case Node::Type::False: + return "false"; + case Node::Type::And: + return "and(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")"; + case Node::Type::Or: + return "or(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")"; + } + + __builtin_unreachable(); + }; + + description.condition = describe(rpn_stack.front().can_be_true.get()); + + for (size_t i = 0; i < key_names.size(); ++i) + if (is_key_used[i]) + description.used_keys.emplace_back(key_names[i]); + + return description; +} /** Index is the value of key every `index_granularity` rows. * This value is called a "mark". That is, the index consists of marks. @@ -1732,18 +1932,38 @@ bool KeyCondition::mayBeTrueAfter( return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true; } - -String KeyCondition::RPNElement::toString() const +String KeyCondition::RPNElement::toString() const { return toString("column " + std::to_string(key_column), false); } +String KeyCondition::RPNElement::toString(const std::string_view & column_name, bool print_constants) const { - auto print_wrapped_column = [this](WriteBuffer & buf) + auto print_wrapped_column = [this, &column_name, print_constants](WriteBuffer & buf) { for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it) + { buf << (*it)->getName() << "("; + if (print_constants) + { + if (const auto * func = typeid_cast(it->get())) + { + if (func->getKind() == FunctionWithOptionalConstArg::Kind::LEFT_CONST) + buf << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]) << ", "; + } + } + } - buf << "column " << key_column; + buf << column_name; for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it) + { + if (print_constants) + { + if (const auto * func = typeid_cast(it->get())) + { + if (func->getKind() == FunctionWithOptionalConstArg::Kind::RIGHT_CONST) + buf << ", " << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]); + } + } buf << ")"; + } }; WriteBufferFromOwnString buf; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 631fb0b7cc4..76ca7410321 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -293,6 +293,15 @@ public: String toString() const; + /// Condition description for EXPLAIN query. + struct Description + { + std::vector used_keys; + std::string condition; + }; + + Description getDescription() const; + /** A chain of possibly monotone functions. * If the key column is wrapped in functions that can be monotonous in some value ranges @@ -345,6 +354,7 @@ private: : function(function_), range(range_), key_column(key_column_) {} String toString() const; + String toString(const std::string_view & column_name, bool print_constants) const; Function function = FUNCTION_UNKNOWN; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6c794d57e2b..9ae5f66f021 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -774,9 +774,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( if (metadata_snapshot->hasPrimaryKey()) { + auto description = key_condition.getDescription(); + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ .type = ReadFromMergeTree::IndexType::PrimaryKey, - .description = key_condition.toString(), + .description = std::move(description.condition), + .used_keys = std::move(description.used_keys), .num_parts_after = sum_parts_pk.load(std::memory_order_relaxed), .num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)}); } From 8d8e57615c0a6eb6f4b6c60d0a5821933a9bc89d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 12:42:23 +0300 Subject: [PATCH 16/22] A little bit better index description. --- .../QueryPlan/ReadFromMergeTree.cpp | 33 +++++++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 1 + src/Storages/MergeTree/KeyCondition.cpp | 60 ++++++++++++++----- src/Storages/MergeTree/KeyCondition.h | 3 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 10 +++- src/Storages/MergeTree/PartitionPruner.h | 2 +- 6 files changed, 83 insertions(+), 26 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index a34a93cf42b..b51711672df 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -193,17 +193,26 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const if (index_stats && !index_stats->empty()) { - std::string indent(format_settings.indent, format_settings.indent_char); - format_settings.out << prefix << "Indexes:\n"; - for (const auto & stat : *index_stats) + /// Do not print anything if no indexes is applied. + if (index_stats->size() > 1 || index_stats->front().type != IndexType::None) + format_settings.out << prefix << "Indexes:\n"; + + for (size_t i = 0; i < index_stats->size(); ++i) { + const auto & stat = (*index_stats)[i]; + if (stat.type == IndexType::None) + continue; + format_settings.out << prefix << indent << indexTypeToString(stat.type) << '\n'; if (!stat.name.empty()) format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n'; + if (!stat.description.empty()) + format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n'; + if (!stat.used_keys.empty()) { format_settings.out << prefix << indent << indent << "Keys: " << stat.name << '\n'; @@ -211,12 +220,22 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << indent << indent << indent << used_key << '\n'; } - if (!stat.description.empty()) - format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n'; + if (!stat.condition.empty()) + format_settings.out << prefix << indent << indent << "Description: " << stat.condition << '\n'; - format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after << '\n'; - format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after << '\n'; + format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after; + if (i) + format_settings.out << '/' << (*index_stats)[i - 1].num_parts_after; + format_settings.out << '\n'; + + format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after; + if (i) + format_settings.out << '/' << (*index_stats)[i - 1].num_granules_after; + format_settings.out << '\n'; } + + format_settings.out << prefix << "Parts: " << index_stats->back().num_parts_after << '\n'; + format_settings.out << prefix << "Granules: " << index_stats->back().num_granules_after << '\n'; } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 713f8f448ae..8cd67d5792e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -29,6 +29,7 @@ public: IndexType type; std::string name; std::string description; + std::string condition; std::vector used_keys; size_t num_parts_after; size_t num_granules_after; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 9d1a8c981ca..b629ee51a1d 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1313,14 +1313,22 @@ String KeyCondition::toString() const KeyCondition::Description KeyCondition::getDescription() const { + /// This code may seem to be too difficult. + /// Here we want to convert RPN back to tree, and also simplify some logical expressions like `and(x, true) -> x`. Description description; + + /// That's a binary tree. Explicit. + /// Build and optimize it simultaneously. struct Node { enum class Type { + /// Leaf, which is RPNElement. Leaf, + /// Leafs, which are logical constants. True, False, + /// Binary operators. And, Or, }; @@ -1329,20 +1337,27 @@ KeyCondition::Description KeyCondition::getDescription() const /// Only for Leaf const RPNElement * element = nullptr; + /// This means that logical NOT is applied to leaf. bool negate = false; std::unique_ptr left = nullptr; std::unique_ptr right = nullptr; }; + /// The algorithm is the same as in KeyCondition::checkInHyperrectangle + /// We build a pair of trees on stack. For checking if key condition may be true, and if it may be false. + /// We need only `can_be_true` in result. struct Frame { std::unique_ptr can_be_true; std::unique_ptr can_be_false; }; + /// Combine two subtrees using logical operator. auto combine = [](std::unique_ptr left, std::unique_ptr right, Node::Type type) { + /// Simplify operators with for one constant condition. + if (type == Node::Type::And) { /// false AND right @@ -1471,36 +1486,53 @@ KeyCondition::Description KeyCondition::getDescription() const for (const auto & key : key_columns) key_names[key.second] = key.first; - std::function describe; - describe = [&describe, &key_names, &is_key_used](const Node * node) -> std::string + WriteBufferFromOwnString buf; + + std::function describe; + describe = [&describe, &key_names, &is_key_used, &buf](const Node * node) { switch (node->type) { case Node::Type::Leaf: { is_key_used[node->element->key_column] = true; - std::string res; + + /// Note: for condition with double negation, like `not(x not in set)`, + /// we can replace it to `x in set` here. + /// But I won't do it, because `cloneASTWithInversionPushDown` already push down `not`. + /// So, this seem to be impossible for `can_be_true` tree. if (node->negate) - res += "not("; - res += node->element->toString(key_names[node->element->key_column], true); + buf << "not("; + buf << node->element->toString(key_names[node->element->key_column], true); if (node->negate) - res += ")"; - return res; + buf << ")"; + break; } case Node::Type::True: - return "true"; + buf << "true"; + break; case Node::Type::False: - return "false"; + buf << "false"; + break; case Node::Type::And: - return "and(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")"; + buf << "and("; + describe(node->left.get()); + buf << ", "; + describe(node->right.get()); + buf << ")"; + break; case Node::Type::Or: - return "or(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")"; + buf << "or("; + describe(node->left.get()); + buf << ", "; + describe(node->right.get()); + buf << ")"; + break; } - - __builtin_unreachable(); }; - description.condition = describe(rpn_stack.front().can_be_true.get()); + describe(rpn_stack.front().can_be_true.get()); + description.condition = std::move(buf.str()); for (size_t i = 0; i < key_names.size(); ++i) if (is_key_used[i]) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 76ca7410321..bd51769ad1f 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -296,13 +296,14 @@ public: /// Condition description for EXPLAIN query. struct Description { + /// Which columns from PK were used, in PK order. std::vector used_keys; + /// Condition which was applied, mostly human-readable. std::string condition; }; Description getDescription() const; - /** A chain of possibly monotone functions. * If the key column is wrapped in functions that can be monotonous in some value ranges * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9ae5f66f021..af72b3e53f2 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -297,18 +297,22 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( if (minmax_idx_condition) { + auto description = minmax_idx_condition->getDescription(); index_stats->emplace_back(ReadFromMergeTree::IndexStat{ .type = ReadFromMergeTree::IndexType::MinMax, - .description = minmax_idx_condition->toString(), + .condition = std::move(description.condition), + .used_keys = std::move(description.used_keys), .num_parts_after = part_filter_counters.num_parts_after_minmax, .num_granules_after = part_filter_counters.num_granules_after_minmax}); } if (partition_pruner) { + auto description = partition_pruner->getKeyCondition().getDescription(); index_stats->emplace_back(ReadFromMergeTree::IndexStat{ .type = ReadFromMergeTree::IndexType::Partition, - .description = partition_pruner->toString(), + .condition = std::move(description.condition), + .used_keys = std::move(description.used_keys), .num_parts_after = part_filter_counters.num_parts_after_partition_pruner, .num_granules_after = part_filter_counters.num_granules_after_partition_pruner}); } @@ -778,7 +782,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( index_stats->emplace_back(ReadFromMergeTree::IndexStat{ .type = ReadFromMergeTree::IndexType::PrimaryKey, - .description = std::move(description.condition), + .condition = std::move(description.condition), .used_keys = std::move(description.used_keys), .num_parts_after = sum_parts_pk.load(std::memory_order_relaxed), .num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)}); diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 4f0384a87c7..a4035087b89 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -33,7 +33,7 @@ public: bool isUseless() const { return useless; } - std::string toString() const { return partition_condition.toString(); } + const KeyCondition & getKeyCondition() const { return partition_condition; } }; } From 6fe3470893554ace6a5aa02038a5f241aee86a0d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 12:43:11 +0300 Subject: [PATCH 17/22] Update test. --- .../01786_explain_merge_tree.reference | 88 +++++++++++-------- .../0_stateless/01786_explain_merge_tree.sh | 2 +- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 29f4fea3107..e89c6d49028 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -1,61 +1,75 @@ ReadFromMergeTree ReadType: Default Indexes: - None - Parts: 5 - Granules: 12 MinMax - Parts: 4 - Granules: 11 + Keys: + y + Description: (y in [1, +inf)) + Parts: 4/5 + Granules: 11/12 Partition - Parts: 3 - Granules: 10 + Keys: + y + bitAnd(z, 3) + Description: and((bitAnd(z, 3) not in [1, 1]), (y in [1, +inf))) + Parts: 3/4 + Granules: 10/11 PrimaryKey - Parts: 2 - Granules: 6 + Keys: + x + y + Description: and((x in [11, +inf)), (y in [1, +inf))) + Parts: 2/3 + Granules: 6/10 Skip Name: t_minmax - Parts: 1 - Granules: 2 + Description: minmax GRANULARITY 2 + Parts: 1/2 + Granules: 2/6 Skip Name: t_set - Parts: 1 - Granules: 1 + Description: set GRANULARITY 2 + Parts: 1/1 + Granules: 1/2 + Parts: 1 + Granules: 1 ----------------- ReadFromMergeTree ReadType: InOrder Indexes: - None - Parts: 5 - Granules: 12 MinMax - Description: unknown - Parts: 5 - Granules: 12 + Description: true + Parts: 5/5 + Granules: 12/12 Partition - Description: unknown - Parts: 5 - Granules: 12 + Description: true + Parts: 5/5 + Granules: 12/12 PrimaryKey - Description: (column 0 in [16, +inf)) - Parts: 1 - Granules: 3 + Keys: + x + Description: (x in [16, +inf)) + Parts: 1/5 + Granules: 3/12 + Parts: 1 + Granules: 3 ----------------- ReadFromMergeTree ReadType: InReverseOrder Indexes: - None - Parts: 5 - Granules: 12 MinMax - Description: unknown - Parts: 5 - Granules: 12 + Description: true + Parts: 5/5 + Granules: 12/12 Partition - Description: unknown - Parts: 5 - Granules: 12 + Description: true + Parts: 5/5 + Granules: 12/12 PrimaryKey - Description: (column 0 in [16, +inf)) - Parts: 1 - Granules: 3 + Keys: + x + Description: (x in [16, +inf)) + Parts: 1/5 + Granules: 3/12 + Parts: 1 + Granules: 3 diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index b94f284478c..052f048b6ff 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : nu $CLICKHOUSE_CLIENT -q " explain actions = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; - " | grep -A 100 "ReadFromMergeTree" | grep -v "Description" + " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description" echo "-----------------" From 23089a2fec52e5e9046281ac530d212c8e073b1a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 14:11:45 +0300 Subject: [PATCH 18/22] Add special setting to show indexes. --- src/Interpreters/InterpreterExplainQuery.cpp | 1 + src/Processors/QueryPlan/IQueryPlanStep.h | 3 ++ src/Processors/QueryPlan/QueryPlan.cpp | 3 ++ src/Processors/QueryPlan/QueryPlan.h | 2 ++ .../QueryPlan/ReadFromMergeTree.cpp | 13 ++++++-- src/Processors/QueryPlan/ReadFromMergeTree.h | 1 + .../01786_explain_merge_tree.reference | 33 ------------------- .../0_stateless/01786_explain_merge_tree.sh | 2 +- 8 files changed, 21 insertions(+), 37 deletions(-) diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index a0195ec85e6..e8578a07491 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -129,6 +129,7 @@ struct QueryPlanSettings {"header", query_plan_options.header}, {"description", query_plan_options.description}, {"actions", query_plan_options.actions}, + {"indexes", query_plan_options.indexes}, {"optimize", optimize}, }; }; diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index 8211b52a6c4..2974891e2bf 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -99,6 +99,9 @@ public: /// Get detailed description of step actions. This is shown in EXPLAIN query with options `actions = 1`. virtual void describeActions(FormatSettings & /*settings*/) const {} + /// Get detailed description of read-from-storage step indexes (if any). Shown in with options `indexes = 1`. + virtual void describeIndexes(FormatSettings & /*settings*/) const {} + /// Get description of processors added in current step. Should be called after updatePipeline(). virtual void describePipeline(FormatSettings & /*settings*/) const {} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 974da579d0c..ad3649385fd 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -243,6 +243,9 @@ static void explainStep( if (options.actions) step.describeActions(settings); + + if (options.indexes) + step.describeIndexes(settings); } std::string debugExplainStep(const IQueryPlanStep & step) diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index bf7ed81fdc1..901d83c3ab8 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -66,6 +66,8 @@ public: bool description = true; /// Add detailed information about step actions. bool actions = false; + /// Add information about indexes actions. + bool indexes = false; }; struct ExplainPipelineOptions diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b51711672df..991ddd2e231 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -191,6 +191,16 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const std::string prefix(format_settings.offset, format_settings.indent_char); format_settings.out << prefix << "ReadType: " << readTypeToString(read_type) << '\n'; + if (index_stats && !index_stats->empty()) + { + format_settings.out << prefix << "Parts: " << index_stats->back().num_parts_after << '\n'; + format_settings.out << prefix << "Granules: " << index_stats->back().num_granules_after << '\n'; + } +} + +void ReadFromMergeTree::describeIndexes(FormatSettings & format_settings) const +{ + std::string prefix(format_settings.offset, format_settings.indent_char); if (index_stats && !index_stats->empty()) { std::string indent(format_settings.indent, format_settings.indent_char); @@ -233,9 +243,6 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << '/' << (*index_stats)[i - 1].num_granules_after; format_settings.out << '\n'; } - - format_settings.out << prefix << "Parts: " << index_stats->back().num_parts_after << '\n'; - format_settings.out << prefix << "Granules: " << index_stats->back().num_granules_after << '\n'; } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 8cd67d5792e..1d6a4491588 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -85,6 +85,7 @@ public: void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(FormatSettings & format_settings) const override; + void describeIndexes(FormatSettings & format_settings) const override; private: const MergeTreeData & storage; diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index e89c6d49028..79be201334b 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -1,5 +1,4 @@ ReadFromMergeTree - ReadType: Default Indexes: MinMax Keys: @@ -31,45 +30,13 @@ Description: set GRANULARITY 2 Parts: 1/1 Granules: 1/2 - Parts: 1 - Granules: 1 ----------------- ReadFromMergeTree ReadType: InOrder - Indexes: - MinMax - Description: true - Parts: 5/5 - Granules: 12/12 - Partition - Description: true - Parts: 5/5 - Granules: 12/12 - PrimaryKey - Keys: - x - Description: (x in [16, +inf)) - Parts: 1/5 - Granules: 3/12 Parts: 1 Granules: 3 ----------------- ReadFromMergeTree ReadType: InReverseOrder - Indexes: - MinMax - Description: true - Parts: 5/5 - Granules: 12/12 - Partition - Description: true - Parts: 5/5 - Granules: 12/12 - PrimaryKey - Keys: - x - Description: (x in [16, +inf)) - Parts: 1/5 - Granules: 3/12 Parts: 1 Granules: 3 diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 052f048b6ff..2056e00bf4a 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -8,7 +8,7 @@ $CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; + explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description" echo "-----------------" From 21e297b8a3eb018d1f03dd98b5052551dc6d10e9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 14:18:55 +0300 Subject: [PATCH 19/22] Update test --- .../0_stateless/01786_explain_merge_tree.reference | 9 +++++++++ .../queries/0_stateless/01786_explain_merge_tree.sh | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 79be201334b..147c05bc11b 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -40,3 +40,12 @@ ReadType: InReverseOrder Parts: 1 Granules: 3 + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + x + plus(x, y) + Description: or((x in 2-element set), (plus(plus(x, y), 1) in (-inf, 2])) + Parts: 1/1 + Granules: 1/1 diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 2056e00bf4a..818b9815848 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -4,6 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT -q "drop table if exists test_index" +$CLICKHOUSE_CLIENT -q "drop table if exists idx" + $CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2" $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" @@ -23,3 +26,12 @@ $CLICKHOUSE_CLIENT -q " explain actions = 1 select x from test_index where x > 15 order by x desc; " | grep -A 100 "ReadFromMergeTree" +$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y)" +$CLICKHOUSE_CLIENT -q "insert into idx select number, number, number from numbers(10)" + +$CLICKHOUSE_CLIENT -q " + explain indexes = 1 select z from idx where not(x + y + 1 > 2 and x not in (4, 5)) + " | grep -A 100 "ReadFromMergeTree" + +$CLICKHOUSE_CLIENT -q "drop table if exists test_index" +$CLICKHOUSE_CLIENT -q "drop table if exists idx" From a0c942c7d3e1f0e56ee2e562943cf618b1f3fc67 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 14:32:59 +0300 Subject: [PATCH 20/22] Update test --- .../0_stateless/01786_explain_merge_tree.reference | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 147c05bc11b..f2f39ab1690 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -3,21 +3,21 @@ MinMax Keys: y - Description: (y in [1, +inf)) + Condition: (y in [1, +inf)) Parts: 4/5 Granules: 11/12 Partition Keys: y bitAnd(z, 3) - Description: and((bitAnd(z, 3) not in [1, 1]), (y in [1, +inf))) + Condition: and((bitAnd(z, 3) not in [1, 1]), (y in [1, +inf))) Parts: 3/4 Granules: 10/11 PrimaryKey Keys: x y - Description: and((x in [11, +inf)), (y in [1, +inf))) + Condition: and((x in [11, +inf)), (y in [1, +inf))) Parts: 2/3 Granules: 6/10 Skip @@ -46,6 +46,6 @@ Keys: x plus(x, y) - Description: or((x in 2-element set), (plus(plus(x, y), 1) in (-inf, 2])) + Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-inf, 2])) Parts: 1/1 Granules: 1/1 From d5882620b2bed55df323ea5e657f8c3c53253545 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 14:33:20 +0300 Subject: [PATCH 21/22] Fix condition --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 991ddd2e231..c8311f49b22 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -231,7 +231,7 @@ void ReadFromMergeTree::describeIndexes(FormatSettings & format_settings) const } if (!stat.condition.empty()) - format_settings.out << prefix << indent << indent << "Description: " << stat.condition << '\n'; + format_settings.out << prefix << indent << indent << "Condition: " << stat.condition << '\n'; format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after; if (i) From abe84acc8c3b56818d27454af747bdddf165948a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Apr 2021 20:19:03 +0300 Subject: [PATCH 22/22] Update test. --- tests/queries/0_stateless/01786_explain_merge_tree.reference | 2 +- tests/queries/0_stateless/01786_explain_merge_tree.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index f2f39ab1690..51eb52688a3 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -10,7 +10,7 @@ Keys: y bitAnd(z, 3) - Condition: and((bitAnd(z, 3) not in [1, 1]), (y in [1, +inf))) + Condition: and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1]))) Parts: 3/4 Granules: 10/11 PrimaryKey diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 818b9815848..2791d0c6921 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists test_index" $CLICKHOUSE_CLIENT -q "drop table if exists idx" -$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2" +$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0" $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" $CLICKHOUSE_CLIENT -q " @@ -26,7 +26,7 @@ $CLICKHOUSE_CLIENT -q " explain actions = 1 select x from test_index where x > 15 order by x desc; " | grep -A 100 "ReadFromMergeTree" -$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y)" +$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0" $CLICKHOUSE_CLIENT -q "insert into idx select number, number, number from numbers(10)" $CLICKHOUSE_CLIENT -q "