#pragma once #include #include #include #include namespace DB { struct QueryPlanResourceHolder; /** A table that represents the union of an arbitrary number of other tables. * All tables must have the same structure. */ class StorageMerge final : public IStorage, WithContext { public: using DBToTableSetMap = std::map>; StorageMerge( const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, const DBToTableSetMap & source_databases_and_tables_, ContextPtr context_); StorageMerge( const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, const String & source_table_regexp_, ContextPtr context_); std::string getName() const override { return "Merge"; } bool isRemote() const override; /// The check is delayed to the read method. It checks the support of the tables used. bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } bool supportsSubcolumns() const override { return true; } bool supportsPrewhere() const override { return true; } bool canMoveConditionsToPrewhere() const override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; void read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; /// you need to add and remove columns in the sub-tables manually /// the structure of sub-tables is not checked void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override; /// Evaluate database name or regexp for StorageMerge and TableFunction merge static std::tuple evaluateDatabaseName(const ASTPtr & node, ContextPtr context); private: std::optional source_database_regexp; std::optional source_table_regexp; std::optional source_databases_and_tables; String source_database_name_or_regexp; bool database_is_regexp = false; /// (Database, Table, Lock, TableName) using StorageWithLockAndName = std::tuple; using StorageListWithLocks = std::list; using DatabaseTablesIterators = std::vector; StorageMerge::StorageListWithLocks getSelectedTables( ContextPtr query_context, const ASTPtr & query = nullptr, bool filter_by_database_virtual_column = false, bool filter_by_table_virtual_column = false) const; template StoragePtr getFirstTable(F && predicate) const; template void forEachTable(F && func) const; DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const; DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const; NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; ColumnsDescription getColumnsDescriptionFromSourceTables() const; friend class ReadFromMerge; }; class ReadFromMerge final : public ISourceStep { public: static constexpr auto name = "ReadFromMerge"; String getName() const override { return name; } using StorageWithLockAndName = std::tuple; using StorageListWithLocks = std::list; using DatabaseTablesIterators = std::vector; ReadFromMerge( Block common_header_, StorageListWithLocks selected_tables_, Names column_names_, bool has_database_virtual_column_, bool has_table_virtual_column_, size_t max_block_size, size_t num_streams, StoragePtr storage, StorageSnapshotPtr storage_snapshot, const SelectQueryInfo & query_info_, ContextMutablePtr context_, QueryProcessingStage::Enum processed_stage); void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void addFilter(ActionsDAGPtr expression, std::string column_name) { added_filter_dags.push_back(expression); added_filter_nodes.nodes.push_back(&expression->findInOutputs(column_name)); } const StorageListWithLocks & getSelectedTables() const { return selected_tables; } void requestReadingInOrder(InputOrderInfoPtr order_info_) { order_info = order_info_; } private: const size_t required_max_block_size; const size_t requested_num_streams; const Block common_header; StorageListWithLocks selected_tables; Names column_names; bool has_database_virtual_column; bool has_table_virtual_column; StoragePtr storage_merge; StorageSnapshotPtr merge_storage_snapshot; SelectQueryInfo query_info; ContextMutablePtr context; QueryProcessingStage::Enum common_processed_stage; std::vector added_filter_dags; ActionDAGNodes added_filter_nodes; std::string added_filter_column_name; InputOrderInfoPtr order_info; struct AliasData { String name; DataTypePtr type; ASTPtr expression; }; using Aliases = std::vector; QueryPipelineBuilderPtr createSources( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, UInt64 max_block_size, const Block & header, const Aliases & aliases, const StorageWithLockAndName & storage_with_lock, Names & real_column_names, ContextMutablePtr modified_context, size_t streams_num, bool concat_streams = false); void convertingSourceStream( const Block & header, const StorageMetadataPtr & metadata_snapshot, const Aliases & aliases, ContextPtr context, ASTPtr & query, QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage); }; }