2012-05-30 05:53:09 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/OptimizedRegularExpression.h>
|
2022-07-25 19:41:43 +00:00
|
|
|
#include <Storages/SelectQueryInfo.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2023-02-24 12:46:09 +00:00
|
|
|
#include <Processors/QueryPlan/SourceStepWithFilter.h>
|
2014-01-10 13:24:50 +00:00
|
|
|
|
2012-05-30 05:53:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
struct QueryPlanResourceHolder;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** A table that represents the union of an arbitrary number of other tables.
|
|
|
|
* All tables must have the same structure.
|
2012-05-30 05:53:09 +00:00
|
|
|
*/
|
2022-05-03 06:43:28 +00:00
|
|
|
class StorageMerge final : public IStorage, WithContext
|
2012-05-30 05:53:09 +00:00
|
|
|
{
|
|
|
|
public:
|
2022-04-19 20:47:29 +00:00
|
|
|
using DBToTableSetMap = std::map<String, std::set<String>>;
|
|
|
|
|
|
|
|
StorageMerge(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const String & comment,
|
|
|
|
const String & source_database_name_or_regexp_,
|
|
|
|
bool database_is_regexp_,
|
|
|
|
const DBToTableSetMap & source_databases_and_tables_,
|
|
|
|
ContextPtr context_);
|
|
|
|
|
|
|
|
StorageMerge(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const String & comment,
|
|
|
|
const String & source_database_name_or_regexp_,
|
|
|
|
bool database_is_regexp_,
|
|
|
|
const String & source_table_regexp_,
|
|
|
|
ContextPtr context_);
|
|
|
|
|
2014-10-03 17:55:36 +00:00
|
|
|
std::string getName() const override { return "Merge"; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-15 14:07:31 +00:00
|
|
|
bool isRemote() const override;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// The check is delayed to the read method. It checks the support of the tables used.
|
2015-05-18 21:20:43 +00:00
|
|
|
bool supportsSampling() const override { return true; }
|
|
|
|
bool supportsFinal() const override { return true; }
|
|
|
|
bool supportsIndexForIn() const override { return true; }
|
2020-12-22 16:40:53 +00:00
|
|
|
bool supportsSubcolumns() const override { return true; }
|
2021-12-29 01:33:39 +00:00
|
|
|
bool supportsPrewhere() const override { return true; }
|
Fix PREWHERE for Merge with different default types
In case of underlying table has an ALIAS for this column, while in Merge
table it is not marked as an alias, there will NOT_FOUND_COLUMN_IN_BLOCK
error.
Further more, when underlying tables has different default type for the
column, i.e. one has ALIAS and another has real column, then you will
also get NOT_FOUND_COLUMN_IN_BLOCK, because Merge engine should take
care of this.
Also this patch reworks how PREWHERE is handled for Merge table, and now
if you use PREWHERE on the column that has the same type and default
type (ALIAS, ...) then it will be possible, and only if the type
differs, it will be prohibited and throw ILLEGAL_PREWHERE error.
And last, but not least, also respect this restrictions for
optimize_move_to_prewhere.
v2: introduce IStorage::supportedPrewhereColumns()
v3: Remove excessive condition for PREWHERE in StorageMerge::read()
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-15 19:03:40 +00:00
|
|
|
std::optional<NameSet> supportedPrewhereColumns() const override;
|
2021-12-29 01:33:39 +00:00
|
|
|
|
|
|
|
bool canMoveConditionsToPrewhere() const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-22 13:32:17 +00:00
|
|
|
QueryProcessingStage::Enum
|
2021-07-09 03:15:41 +00:00
|
|
|
getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
|
2018-04-19 14:47:09 +00:00
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
void read(
|
|
|
|
QueryPlan & query_plan,
|
2012-05-30 05:53:09 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2022-10-07 10:46:45 +00:00
|
|
|
size_t num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override;
|
2019-12-26 18:17:05 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// you need to add and remove columns in the sub-tables manually
|
|
|
|
/// the structure of sub-tables is not checked
|
2021-10-25 17:49:49 +00:00
|
|
|
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
|
2014-02-11 18:38:21 +00:00
|
|
|
|
2020-06-17 09:38:47 +00:00
|
|
|
bool mayBenefitFromIndexForIn(
|
2021-04-10 23:33:54 +00:00
|
|
|
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override;
|
2018-03-16 09:00:04 +00:00
|
|
|
|
2021-09-25 02:48:24 +00:00
|
|
|
/// Evaluate database name or regexp for StorageMerge and TableFunction merge
|
|
|
|
static std::tuple<bool /* is_regexp */, ASTPtr> evaluateDatabaseName(const ASTPtr & node, ContextPtr context);
|
|
|
|
|
2012-05-30 05:53:09 +00:00
|
|
|
private:
|
2021-06-04 14:48:48 +00:00
|
|
|
std::optional<OptimizedRegularExpression> source_database_regexp;
|
2020-12-10 20:16:53 +00:00
|
|
|
std::optional<OptimizedRegularExpression> source_table_regexp;
|
2021-09-25 02:48:24 +00:00
|
|
|
std::optional<DBToTableSetMap> source_databases_and_tables;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-06-25 13:51:17 +00:00
|
|
|
String source_database_name_or_regexp;
|
|
|
|
bool database_is_regexp = false;
|
|
|
|
|
2021-06-07 09:14:29 +00:00
|
|
|
/// (Database, Table, Lock, TableName)
|
|
|
|
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
|
2019-12-30 18:20:43 +00:00
|
|
|
using StorageListWithLocks = std::list<StorageWithLockAndName>;
|
2021-06-07 09:14:29 +00:00
|
|
|
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
|
2017-11-04 03:20:18 +00:00
|
|
|
|
2020-04-09 18:10:27 +00:00
|
|
|
StorageMerge::StorageListWithLocks getSelectedTables(
|
2021-06-25 08:00:30 +00:00
|
|
|
ContextPtr query_context,
|
|
|
|
const ASTPtr & query = nullptr,
|
|
|
|
bool filter_by_database_virtual_column = false,
|
|
|
|
bool filter_by_table_virtual_column = false) const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-07-05 19:56:51 +00:00
|
|
|
template <typename F>
|
|
|
|
StoragePtr getFirstTable(F && predicate) const;
|
|
|
|
|
2021-12-29 01:33:39 +00:00
|
|
|
template <typename F>
|
|
|
|
void forEachTable(F && func) const;
|
|
|
|
|
2021-06-25 13:51:17 +00:00
|
|
|
DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
|
|
|
|
|
2021-06-07 09:14:29 +00:00
|
|
|
DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
|
2020-04-27 13:55:30 +00:00
|
|
|
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
2020-10-08 20:39:24 +00:00
|
|
|
ColumnSizeByName getColumnSizes() const override;
|
2019-06-02 12:11:01 +00:00
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
ColumnsDescription getColumnsDescriptionFromSourceTables() const;
|
|
|
|
|
2023-01-16 13:39:46 +00:00
|
|
|
bool tableSupportsPrewhere() const;
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
friend class ReadFromMerge;
|
|
|
|
};
|
|
|
|
|
2023-02-24 12:46:09 +00:00
|
|
|
class ReadFromMerge final : public SourceStepWithFilter
|
2022-07-25 19:41:43 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = "ReadFromMerge";
|
|
|
|
String getName() const override { return name; }
|
|
|
|
|
2022-07-27 12:00:55 +00:00
|
|
|
using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
|
|
|
|
using StorageListWithLocks = std::list<StorageWithLockAndName>;
|
|
|
|
using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
|
|
|
|
|
2022-07-26 14:43:05 +00:00
|
|
|
ReadFromMerge(
|
|
|
|
Block common_header_,
|
2022-07-27 12:00:55 +00:00
|
|
|
StorageListWithLocks selected_tables_,
|
2022-07-26 14:43:05 +00:00
|
|
|
Names column_names_,
|
2022-07-27 12:00:55 +00:00
|
|
|
bool has_database_virtual_column_,
|
|
|
|
bool has_table_virtual_column_,
|
2022-07-26 14:43:05 +00:00
|
|
|
size_t max_block_size,
|
|
|
|
size_t num_streams,
|
|
|
|
StoragePtr storage,
|
|
|
|
StorageSnapshotPtr storage_snapshot,
|
|
|
|
const SelectQueryInfo & query_info_,
|
|
|
|
ContextMutablePtr context_,
|
|
|
|
QueryProcessingStage::Enum processed_stage);
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
|
|
|
|
|
2022-11-01 19:51:52 +00:00
|
|
|
const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
|
|
|
|
|
2023-02-09 11:00:22 +00:00
|
|
|
/// Returns `false` if requested reading cannot be performed.
|
|
|
|
bool requestReadingInOrder(InputOrderInfoPtr order_info_);
|
2022-11-01 19:51:52 +00:00
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
private:
|
|
|
|
const size_t required_max_block_size;
|
2022-07-26 14:43:05 +00:00
|
|
|
const size_t requested_num_streams;
|
|
|
|
const Block common_header;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
2022-07-27 12:00:55 +00:00
|
|
|
StorageListWithLocks selected_tables;
|
2022-07-25 19:41:43 +00:00
|
|
|
Names column_names;
|
2022-07-27 12:00:55 +00:00
|
|
|
bool has_database_virtual_column;
|
|
|
|
bool has_table_virtual_column;
|
2022-07-26 14:43:05 +00:00
|
|
|
StoragePtr storage_merge;
|
|
|
|
StorageSnapshotPtr merge_storage_snapshot;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
2023-03-29 09:42:32 +00:00
|
|
|
/// Store read plan for each child table.
|
2023-04-02 08:09:39 +00:00
|
|
|
/// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE).
|
2023-03-28 11:04:07 +00:00
|
|
|
std::vector<QueryPlan> child_plans;
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
SelectQueryInfo query_info;
|
|
|
|
ContextMutablePtr context;
|
2022-07-26 14:43:05 +00:00
|
|
|
QueryProcessingStage::Enum common_processed_stage;
|
2022-07-25 19:41:43 +00:00
|
|
|
|
2022-11-01 19:51:52 +00:00
|
|
|
InputOrderInfoPtr order_info;
|
|
|
|
|
2021-06-24 23:25:06 +00:00
|
|
|
struct AliasData
|
|
|
|
{
|
|
|
|
String name;
|
|
|
|
DataTypePtr type;
|
|
|
|
ASTPtr expression;
|
|
|
|
};
|
|
|
|
|
|
|
|
using Aliases = std::vector<AliasData>;
|
|
|
|
|
2023-02-14 11:20:01 +00:00
|
|
|
static SelectQueryInfo getModifiedQueryInfo(const SelectQueryInfo & query_info,
|
|
|
|
const ContextPtr & modified_context,
|
|
|
|
const StorageWithLockAndName & storage_with_lock_and_name,
|
|
|
|
const StorageSnapshotPtr & storage_snapshot);
|
|
|
|
|
2022-07-25 19:41:43 +00:00
|
|
|
QueryPipelineBuilderPtr createSources(
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2020-06-16 15:51:29 +00:00
|
|
|
const QueryProcessingStage::Enum & processed_stage,
|
2021-04-10 23:33:54 +00:00
|
|
|
UInt64 max_block_size,
|
2020-06-16 15:51:29 +00:00
|
|
|
const Block & header,
|
2021-06-26 12:50:15 +00:00
|
|
|
const Aliases & aliases,
|
2020-06-16 15:51:29 +00:00
|
|
|
const StorageWithLockAndName & storage_with_lock,
|
2023-02-15 14:03:52 +00:00
|
|
|
Names real_column_names,
|
2021-05-31 14:49:02 +00:00
|
|
|
ContextMutablePtr modified_context,
|
2020-06-16 15:51:29 +00:00
|
|
|
size_t streams_num,
|
2020-01-31 16:29:40 +00:00
|
|
|
bool concat_streams = false);
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2023-01-06 13:26:15 +00:00
|
|
|
static void convertingSourceStream(
|
2023-02-15 14:03:52 +00:00
|
|
|
const Block & header,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const Aliases & aliases,
|
2023-01-06 15:04:00 +00:00
|
|
|
ContextPtr context,
|
2023-02-15 14:03:52 +00:00
|
|
|
QueryPipelineBuilder & builder,
|
|
|
|
const QueryProcessingStage::Enum & processed_stage);
|
2012-05-30 05:53:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|