ClickHouse/src/Storages/StorageMerge.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

208 lines
7.2 KiB
C++
Raw Normal View History

2012-05-30 05:53:09 +00:00
#pragma once
#include <Common/OptimizedRegularExpression.h>
2022-07-25 19:41:43 +00:00
#include <Storages/SelectQueryInfo.h>
#include <Storages/IStorage.h>
2023-02-24 12:46:09 +00:00
#include <Processors/QueryPlan/SourceStepWithFilter.h>
2014-01-10 13:24:50 +00:00
2012-05-30 05:53:09 +00:00
namespace DB
{
2022-05-20 19:49:31 +00:00
struct QueryPlanResourceHolder;
2017-04-16 15:00:33 +00:00
/** A table that represents the union of an arbitrary number of other tables.
* All tables must have the same structure.
2012-05-30 05:53:09 +00:00
*/
class StorageMerge final : public IStorage, WithContext
2012-05-30 05:53:09 +00:00
{
public:
using DBToTableSetMap = std::map<String, std::set<String>>;
StorageMerge(
const StorageID & table_id_,
const ColumnsDescription & columns_,
const String & comment,
const String & source_database_name_or_regexp_,
bool database_is_regexp_,
const DBToTableSetMap & source_databases_and_tables_,
ContextPtr context_);
StorageMerge(
const StorageID & table_id_,
const ColumnsDescription & columns_,
const String & comment,
const String & source_database_name_or_regexp_,
bool database_is_regexp_,
const String & source_table_regexp_,
ContextPtr context_);
std::string getName() const override { return "Merge"; }
bool isRemote() const override;
2017-04-16 15:00:33 +00:00
/// The check is delayed to the read method. It checks the support of the tables used.
2015-05-18 21:20:43 +00:00
bool supportsSampling() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsIndexForIn() const override { return true; }
2020-12-22 16:40:53 +00:00
bool supportsSubcolumns() const override { return true; }
bool supportsPrewhere() const override { return true; }
std::optional<NameSet> supportedPrewhereColumns() const override;
bool canMoveConditionsToPrewhere() const override;
QueryProcessingStage::Enum
getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
2022-05-20 19:49:31 +00:00
void read(
QueryPlan & query_plan,
2012-05-30 05:53:09 +00:00
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams) override;
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override;
2019-12-26 18:17:05 +00:00
2017-04-16 15:00:33 +00:00
/// you need to add and remove columns in the sub-tables manually
/// the structure of sub-tables is not checked
2021-10-25 17:49:49 +00:00
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
bool mayBenefitFromIndexForIn(
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override;
2018-03-16 09:00:04 +00:00
/// Evaluate database name or regexp for StorageMerge and TableFunction merge
static std::tuple<bool /* is_regexp */, ASTPtr> evaluateDatabaseName(const ASTPtr & node, ContextPtr context);
2012-05-30 05:53:09 +00:00
private:
2021-06-04 14:48:48 +00:00
std::optional<OptimizedRegularExpression> source_database_regexp;
std::optional<OptimizedRegularExpression> source_table_regexp;
std::optional<DBToTableSetMap> source_databases_and_tables;
2021-06-25 13:51:17 +00:00
String source_database_name_or_regexp;
bool database_is_regexp = false;
2021-06-07 09:14:29 +00:00
/// (Database, Table, Lock, TableName)
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
2019-12-30 18:20:43 +00:00
using StorageListWithLocks = std::list<StorageWithLockAndName>;
2021-06-07 09:14:29 +00:00
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
StorageMerge::StorageListWithLocks getSelectedTables(
2021-06-25 08:00:30 +00:00
ContextPtr query_context,
const ASTPtr & query = nullptr,
bool filter_by_database_virtual_column = false,
bool filter_by_table_virtual_column = false) const;
template <typename F>
StoragePtr getFirstTable(F && predicate) const;
template <typename F>
void forEachTable(F && func) const;
2021-06-25 13:51:17 +00:00
DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
2021-06-07 09:14:29 +00:00
DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
2020-04-27 13:55:30 +00:00
NamesAndTypesList getVirtuals() const override;
ColumnSizeByName getColumnSizes() const override;
2022-07-25 19:41:43 +00:00
ColumnsDescription getColumnsDescriptionFromSourceTables() const;
bool tableSupportsPrewhere() const;
2022-07-25 19:41:43 +00:00
friend class ReadFromMerge;
};
2023-02-24 12:46:09 +00:00
class ReadFromMerge final : public SourceStepWithFilter
2022-07-25 19:41:43 +00:00
{
public:
static constexpr auto name = "ReadFromMerge";
String getName() const override { return name; }
2022-07-27 12:00:55 +00:00
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
using StorageListWithLocks = std::list<StorageWithLockAndName>;
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
2022-07-26 14:43:05 +00:00
ReadFromMerge(
Block common_header_,
2022-07-27 12:00:55 +00:00
StorageListWithLocks selected_tables_,
2022-07-26 14:43:05 +00:00
Names column_names_,
2022-07-27 12:00:55 +00:00
bool has_database_virtual_column_,
bool has_table_virtual_column_,
2022-07-26 14:43:05 +00:00
size_t max_block_size,
size_t num_streams,
StoragePtr storage,
StorageSnapshotPtr storage_snapshot,
const SelectQueryInfo & query_info_,
ContextMutablePtr context_,
QueryProcessingStage::Enum processed_stage);
2022-07-25 19:41:43 +00:00
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
/// Returns `false` if requested reading cannot be performed.
bool requestReadingInOrder(InputOrderInfoPtr order_info_);
2022-07-25 19:41:43 +00:00
private:
const size_t required_max_block_size;
2022-07-26 14:43:05 +00:00
const size_t requested_num_streams;
const Block common_header;
2022-07-25 19:41:43 +00:00
2022-07-27 12:00:55 +00:00
StorageListWithLocks selected_tables;
2022-07-25 19:41:43 +00:00
Names column_names;
2022-07-27 12:00:55 +00:00
bool has_database_virtual_column;
bool has_table_virtual_column;
2022-07-26 14:43:05 +00:00
StoragePtr storage_merge;
StorageSnapshotPtr merge_storage_snapshot;
2022-07-25 19:41:43 +00:00
2023-03-29 09:42:32 +00:00
/// Store read plan for each child table.
/// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE).
std::vector<QueryPlan> child_plans;
2022-07-25 19:41:43 +00:00
SelectQueryInfo query_info;
ContextMutablePtr context;
2022-07-26 14:43:05 +00:00
QueryProcessingStage::Enum common_processed_stage;
2022-07-25 19:41:43 +00:00
InputOrderInfoPtr order_info;
2021-06-24 23:25:06 +00:00
struct AliasData
{
String name;
DataTypePtr type;
ASTPtr expression;
};
using Aliases = std::vector<AliasData>;
2023-02-14 11:20:01 +00:00
static SelectQueryInfo getModifiedQueryInfo(const SelectQueryInfo & query_info,
const ContextPtr & modified_context,
const StorageWithLockAndName & storage_with_lock_and_name,
const StorageSnapshotPtr & storage_snapshot);
2022-07-25 19:41:43 +00:00
QueryPipelineBuilderPtr createSources(
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
const QueryProcessingStage::Enum & processed_stage,
UInt64 max_block_size,
const Block & header,
2021-06-26 12:50:15 +00:00
const Aliases & aliases,
const StorageWithLockAndName & storage_with_lock,
2023-02-15 14:03:52 +00:00
Names real_column_names,
2021-05-31 14:49:02 +00:00
ContextMutablePtr modified_context,
size_t streams_num,
bool concat_streams = false);
static void convertingSourceStream(
2023-02-15 14:03:52 +00:00
const Block & header,
const StorageMetadataPtr & metadata_snapshot,
const Aliases & aliases,
2023-01-06 15:04:00 +00:00
ContextPtr context,
2023-02-15 14:03:52 +00:00
QueryPipelineBuilder & builder,
const QueryProcessingStage::Enum & processed_stage);
2012-05-30 05:53:09 +00:00
};
}