2012-05-30 05:53:09 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/OptimizedRegularExpression.h>
|
2022-07-25 19:41:43 +00:00
|
|
|
#include <Storages/SelectQueryInfo.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2022-07-25 19:41:43 +00:00
|
|
|
#include <Processors/QueryPlan/ISourceStep.h>
|
2014-01-10 13:24:50 +00:00
|
|
|
|
2012-05-30 05:53:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
struct QueryPlanResourceHolder;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** A table that represents the union of an arbitrary number of other tables.
|
|
|
|
* All tables must have the same structure.
|
2012-05-30 05:53:09 +00:00
|
|
|
*/
|
2022-05-03 06:43:28 +00:00
|
|
|
class StorageMerge final : public IStorage, WithContext
|
2012-05-30 05:53:09 +00:00
|
|
|
{
|
|
|
|
public:
|
2022-04-19 20:47:29 +00:00
|
|
|
using DBToTableSetMap = std::map<String, std::set<String>>;
|
|
|
|
|
|
|
|
StorageMerge(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const String & comment,
|
|
|
|
const String & source_database_name_or_regexp_,
|
|
|
|
bool database_is_regexp_,
|
|
|
|
const DBToTableSetMap & source_databases_and_tables_,
|
|
|
|
ContextPtr context_);
|
|
|
|
|
|
|
|
StorageMerge(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const String & comment,
|
|
|
|
const String & source_database_name_or_regexp_,
|
|
|
|
bool database_is_regexp_,
|
|
|
|
const String & source_table_regexp_,
|
|
|
|
ContextPtr context_);
|
|
|
|
|
2014-10-03 17:55:36 +00:00
|
|
|
std::string getName() const override { return "Merge"; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-15 14:07:31 +00:00
|
|
|
bool isRemote() const override;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// The check is delayed to the read method. It checks the support of the tables used.
|
2015-05-18 21:20:43 +00:00
|
|
|
bool supportsSampling() const override { return true; }
|
|
|
|
bool supportsFinal() const override { return true; }
|
|
|
|
bool supportsIndexForIn() const override { return true; }
|
2020-12-22 16:40:53 +00:00
|
|
|
bool supportsSubcolumns() const override { return true; }
|
2021-12-29 01:33:39 +00:00
|
|
|
bool supportsPrewhere() const override { return true; }
|
|
|
|
|
|
|
|
bool canMoveConditionsToPrewhere() const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-22 13:32:17 +00:00
|
|
|
QueryProcessingStage::Enum
|
2021-07-09 03:15:41 +00:00
|
|
|
getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
|
2018-04-19 14:47:09 +00:00
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
void read(
|
|
|
|
QueryPlan & query_plan,
|
2012-05-30 05:53:09 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2022-10-07 10:46:45 +00:00
|
|
|
size_t num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override;
|
2019-12-26 18:17:05 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// you need to add and remove columns in the sub-tables manually
|
|
|
|
/// the structure of sub-tables is not checked
|
2021-10-25 17:49:49 +00:00
|
|
|
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
|
2014-02-11 18:38:21 +00:00
|
|
|
|
2020-06-17 09:38:47 +00:00
|
|
|
bool mayBenefitFromIndexForIn(
|
2021-04-10 23:33:54 +00:00
|
|
|
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override;
|
2018-03-16 09:00:04 +00:00
|
|
|
|
2021-09-25 02:48:24 +00:00
|
|
|
/// Evaluate database name or regexp for StorageMerge and TableFunction merge
|
|
|
|
static std::tuple<bool /* is_regexp */, ASTPtr> evaluateDatabaseName(const ASTPtr & node, ContextPtr context);
|
|
|
|
|
2012-05-30 05:53:09 +00:00
|
|
|
private:
|
2021-06-04 14:48:48 +00:00
|
|
|
std::optional<OptimizedRegularExpression> source_database_regexp;
|
2020-12-10 20:16:53 +00:00
|
|
|
std::optional<OptimizedRegularExpression> source_table_regexp;
|
2021-09-25 02:48:24 +00:00
|
|
|
std::optional<DBToTableSetMap> source_databases_and_tables;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-06-25 13:51:17 +00:00
|
|
|
String source_database_name_or_regexp;
|
|
|
|
bool database_is_regexp = false;
|
|
|
|
|
2021-06-07 09:14:29 +00:00
|
|
|
/// (Database, Table, Lock, TableName)
|
|
|
|
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
|
2019-12-30 18:20:43 +00:00
|
|
|
using StorageListWithLocks = std::list<StorageWithLockAndName>;
|
2021-06-07 09:14:29 +00:00
|
|
|
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
|
2017-11-04 03:20:18 +00:00
|
|
|
|
2020-04-09 18:10:27 +00:00
|
|
|
StorageMerge::StorageListWithLocks getSelectedTables(
|
2021-06-25 08:00:30 +00:00
|
|
|
ContextPtr query_context,
|
|
|
|
const ASTPtr & query = nullptr,
|
|
|
|
bool filter_by_database_virtual_column = false,
|
|
|
|
bool filter_by_table_virtual_column = false) const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-07-05 19:56:51 +00:00
|
|
|
template <typename F>
|
|
|
|
StoragePtr getFirstTable(F && predicate) const;
|
|
|
|
|
2021-12-29 01:33:39 +00:00
|
|
|
template <typename F>
|
|
|
|
void forEachTable(F && func) const;
|
|
|
|
|
2021-06-25 13:51:17 +00:00
|
|
|
DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
|
|
|
|
|
2021-06-07 09:14:29 +00:00
|
|
|
DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
|
2020-04-27 13:55:30 +00:00
|
|
|
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
2020-10-08 20:39:24 +00:00
|
|
|
ColumnSizeByName getColumnSizes() const override;
|
2019-06-02 12:11:01 +00:00
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
ColumnsDescription getColumnsDescriptionFromSourceTables() const;
|
|
|
|
|
|
|
|
friend class ReadFromMerge;
|
|
|
|
};
|
|
|
|
|
|
|
|
class ReadFromMerge final : public ISourceStep
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = "ReadFromMerge";
|
|
|
|
String getName() const override { return name; }
|
|
|
|
|
2022-07-27 12:00:55 +00:00
|
|
|
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
|
|
|
|
using StorageListWithLocks = std::list<StorageWithLockAndName>;
|
|
|
|
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
|
|
|
|
|
2022-07-26 14:43:05 +00:00
|
|
|
ReadFromMerge(
|
|
|
|
Block common_header_,
|
2022-07-27 12:00:55 +00:00
|
|
|
StorageListWithLocks selected_tables_,
|
2022-07-26 14:43:05 +00:00
|
|
|
Names column_names_,
|
2022-07-27 12:00:55 +00:00
|
|
|
bool has_database_virtual_column_,
|
|
|
|
bool has_table_virtual_column_,
|
2022-07-26 14:43:05 +00:00
|
|
|
size_t max_block_size,
|
|
|
|
size_t num_streams,
|
|
|
|
StoragePtr storage,
|
|
|
|
StorageSnapshotPtr storage_snapshot,
|
|
|
|
const SelectQueryInfo & query_info_,
|
|
|
|
ContextMutablePtr context_,
|
|
|
|
QueryProcessingStage::Enum processed_stage);
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
|
|
|
|
|
2022-07-26 15:01:39 +00:00
|
|
|
void addFilter(ActionsDAGPtr expression, std::string column_name)
|
|
|
|
{
|
2022-07-28 09:40:09 +00:00
|
|
|
added_filter_dags.push_back(expression);
|
|
|
|
added_filter_nodes.nodes.push_back(&expression->findInOutputs(column_name));
|
2022-07-26 15:01:39 +00:00
|
|
|
}
|
|
|
|
|
2022-11-01 19:51:52 +00:00
|
|
|
const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
|
|
|
|
|
|
|
|
void requestReadingInOrder(InputOrderInfoPtr order_info_) { order_info = order_info_; }
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
private:
|
|
|
|
const size_t required_max_block_size;
|
2022-07-26 14:43:05 +00:00
|
|
|
const size_t requested_num_streams;
|
|
|
|
const Block common_header;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
2022-07-27 12:00:55 +00:00
|
|
|
StorageListWithLocks selected_tables;
|
2022-07-25 19:41:43 +00:00
|
|
|
Names column_names;
|
2022-07-27 12:00:55 +00:00
|
|
|
bool has_database_virtual_column;
|
|
|
|
bool has_table_virtual_column;
|
2022-07-26 14:43:05 +00:00
|
|
|
StoragePtr storage_merge;
|
|
|
|
StorageSnapshotPtr merge_storage_snapshot;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
|
|
|
SelectQueryInfo query_info;
|
|
|
|
ContextMutablePtr context;
|
2022-07-26 14:43:05 +00:00
|
|
|
QueryProcessingStage::Enum common_processed_stage;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
2022-07-28 09:40:09 +00:00
|
|
|
std::vector<ActionsDAGPtr> added_filter_dags;
|
|
|
|
ActionDAGNodes added_filter_nodes;
|
|
|
|
|
2022-07-26 15:01:39 +00:00
|
|
|
std::string added_filter_column_name;
|
|
|
|
|
2022-11-01 19:51:52 +00:00
|
|
|
InputOrderInfoPtr order_info;
|
|
|
|
|
2021-06-24 23:25:06 +00:00
|
|
|
struct AliasData
|
|
|
|
{
|
|
|
|
String name;
|
|
|
|
DataTypePtr type;
|
|
|
|
ASTPtr expression;
|
|
|
|
};
|
|
|
|
|
|
|
|
using Aliases = std::vector<AliasData>;
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
QueryPipelineBuilderPtr createSources(
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2020-06-16 15:51:29 +00:00
|
|
|
const QueryProcessingStage::Enum & processed_stage,
|
2021-04-10 23:33:54 +00:00
|
|
|
UInt64 max_block_size,
|
2020-06-16 15:51:29 +00:00
|
|
|
const Block & header,
|
2021-06-26 12:50:15 +00:00
|
|
|
const Aliases & aliases,
|
2020-06-16 15:51:29 +00:00
|
|
|
const StorageWithLockAndName & storage_with_lock,
|
2020-01-31 16:29:40 +00:00
|
|
|
Names & real_column_names,
|
2021-05-31 14:49:02 +00:00
|
|
|
ContextMutablePtr modified_context,
|
2020-06-16 15:51:29 +00:00
|
|
|
size_t streams_num,
|
2020-01-31 16:29:40 +00:00
|
|
|
bool concat_streams = false);
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2020-06-16 15:51:29 +00:00
|
|
|
void convertingSourceStream(
|
2021-06-24 23:25:06 +00:00
|
|
|
const Block & header, const StorageMetadataPtr & metadata_snapshot, const Aliases & aliases,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context, ASTPtr & query,
|
2022-07-25 19:41:43 +00:00
|
|
|
QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage);
|
2012-05-30 05:53:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|