#pragma once #include #include #include #include #include #include namespace DB { class KeyCondition; struct MergeTreeDataSelectSamplingData { bool use_sampling = false; bool read_nothing = false; Float64 used_sample_factor = 1.0; std::shared_ptr filter_function; ActionsDAGPtr filter_expression; }; // struct MergeTreeDataSelectCache // { // RangesInDataParts parts_with_ranges; // MergeTreeDataSelectSamplingData sampling; // std::unique_ptr index_stats; // size_t sum_marks = 0; // size_t sum_ranges = 0; // bool use_cache = false; // }; using PartitionIdToMaxBlock = std::unordered_map; /** Executes SELECT queries on data from the merge tree. */ class MergeTreeDataSelectExecutor { public: explicit MergeTreeDataSelectExecutor(const MergeTreeData & data_); /** When reading, selects a set of parts that covers the desired range of the index. * max_blocks_number_to_read - if not nullptr, do not read all the parts whose right border is greater than max_block in partition. */ QueryPlanPtr read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, unsigned num_streams, QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read = nullptr) const; size_t estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, const Names & column_names, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, unsigned num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; QueryPlanPtr readFromParts( MergeTreeData::DataPartsVector parts, const Names & column_names, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, unsigned num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; private: const MergeTreeData & data; Poco::Logger * log; // QueryPlanPtr spreadMarkRangesAmongStreams( // RangesInDataParts && parts, // size_t num_streams, // const Names & column_names, // const StorageMetadataPtr & metadata_snapshot, // UInt64 max_block_size, // bool use_uncompressed_cache, // const SelectQueryInfo & query_info, // const Names & virt_columns, // const Settings & settings, // const MergeTreeReaderSettings & reader_settings, // const String & query_id) const; // /// out_projection - save projection only with columns, requested to read // QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder( // RangesInDataParts && parts, // size_t num_streams, // const Names & column_names, // const StorageMetadataPtr & metadata_snapshot, // UInt64 max_block_size, // bool use_uncompressed_cache, // const SelectQueryInfo & query_info, // const ActionsDAGPtr & sorting_key_prefix_expr, // const Names & virt_columns, // const Settings & settings, // const MergeTreeReaderSettings & reader_settings, // ActionsDAGPtr & out_projection, // const String & query_id, // const InputOrderInfoPtr & input_order_info) const; // QueryPlanPtr spreadMarkRangesAmongStreamsFinal( // RangesInDataParts && parts, // size_t num_streams, // const Names & column_names, // const StorageMetadataPtr & metadata_snapshot, // UInt64 max_block_size, // bool use_uncompressed_cache, // const SelectQueryInfo & query_info, // const Names & virt_columns, // const Settings & settings, // const MergeTreeReaderSettings & reader_settings, // ActionsDAGPtr & out_projection, // const String & query_id) const; /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. static size_t getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, Poco::Logger * log); static MarkRanges markRangesFromPKRange( const MergeTreeData::DataPartPtr & part, const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, Poco::Logger * log); static MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index_helper, MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, const Settings & settings, const MergeTreeReaderSettings & reader_settings, size_t & total_granules, size_t & granules_dropped, Poco::Logger * log); struct PartFilterCounters { size_t num_initial_selected_parts = 0; size_t num_initial_selected_granules = 0; size_t num_parts_after_minmax = 0; size_t num_granules_after_minmax = 0; size_t num_parts_after_partition_pruner = 0; size_t num_granules_after_partition_pruner = 0; }; /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, /// as well as `max_block_number_to_read`. static void selectPartsToRead( MergeTreeData::DataPartsVector & parts, const std::optional> & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, PartFilterCounters & counters); /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded. static void selectPartsToReadWithUUIDFilter( MergeTreeData::DataPartsVector & parts, const std::optional> & part_values, MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, Poco::Logger * log); public: static std::optional> filterPartsByVirtualColumns( const MergeTreeData & data, MergeTreeData::DataPartsVector & parts, const ASTPtr & query, ContextPtr context); static void filterPartsByPartition( const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const SelectQueryInfo & query_info, const ContextPtr & context, const ContextPtr & query_context, MergeTreeData::DataPartsVector & parts, const std::optional> & part_values, const PartitionIdToMaxBlock * max_block_numbers_to_read, Poco::Logger * log, ReadFromMergeTree::IndexStats & index_stats); static RangesInDataParts filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, StorageMetadataPtr metadata_snapshot, const SelectQueryInfo & query_info, const ContextPtr & context, KeyCondition & key_condition, const MergeTreeReaderSettings & reader_settings, Poco::Logger * log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, bool use_skip_indexes); static MergeTreeDataSelectSamplingData getSampling( const ASTSelectQuery & select, MergeTreeData::DataPartsVector & parts, const StorageMetadataPtr & metadata_snapshot, KeyCondition & key_condition, const MergeTreeData & data, Poco::Logger * log, bool sample_factor_column_queried, NamesAndTypesList available_real_columns, ContextPtr context); static String checkLimits(const MergeTreeData & data, const RangesInDataParts & parts_with_ranges, const ContextPtr & context); }; }