2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/narrowBlockInputStreams.h>
|
2018-02-23 01:00:47 +00:00
|
|
|
#include <DataStreams/OneBlockInputStream.h>
|
|
|
|
#include <DataStreams/materializeBlock.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/StorageMerge.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Storages/StorageFactory.h>
|
2017-11-20 05:22:54 +00:00
|
|
|
#include <Storages/VirtualColumnUtils.h>
|
2018-12-25 23:14:39 +00:00
|
|
|
#include <Storages/AlterCommands.h>
|
2018-11-08 15:43:14 +00:00
|
|
|
#include <Interpreters/SyntaxAnalyzer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/ExpressionActions.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
2018-02-23 01:00:47 +00:00
|
|
|
#include <Interpreters/InterpreterSelectQuery.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Parsers/ASTLiteral.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2019-08-10 17:51:47 +00:00
|
|
|
#include <Common/checkStackSize.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Databases/IDatabase.h>
|
2018-09-19 10:16:30 +00:00
|
|
|
#include <ext/range.h>
|
2019-01-09 15:44:20 +00:00
|
|
|
#include <algorithm>
|
2018-09-20 05:40:06 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
2020-01-31 16:29:40 +00:00
|
|
|
#include <Processors/Sources/SourceFromInputStream.h>
|
|
|
|
#include <Processors/Transforms/MaterializingTransform.h>
|
|
|
|
#include <Processors/ConcatProcessor.h>
|
|
|
|
#include <Processors/Transforms/AddingConstColumnTransform.h>
|
|
|
|
#include <Processors/Transforms/ConvertingTransform.h>
|
2014-12-30 18:04:53 +00:00
|
|
|
|
2012-05-30 05:53:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int ILLEGAL_PREWHERE;
|
2017-12-30 00:36:06 +00:00
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2018-09-20 05:40:06 +00:00
|
|
|
extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
|
2019-05-21 13:04:34 +00:00
|
|
|
extern const int SAMPLING_NOT_SUPPORTED;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-09-30 03:08:47 +00:00
|
|
|
StorageMerge::StorageMerge(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & table_id_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const String & source_database_,
|
|
|
|
const String & table_name_regexp_,
|
|
|
|
const Context & context_)
|
2020-04-27 13:55:30 +00:00
|
|
|
: IStorage(table_id_)
|
2019-05-21 11:24:32 +00:00
|
|
|
, source_database(source_database_)
|
|
|
|
, table_name_regexp(table_name_regexp_)
|
|
|
|
, global_context(context_)
|
2012-05-30 05:53:09 +00:00
|
|
|
{
|
2020-06-19 15:39:41 +00:00
|
|
|
StorageInMemoryMetadata storage_metadata;
|
|
|
|
storage_metadata.setColumns(columns_);
|
|
|
|
setInMemoryMetadata(storage_metadata);
|
2012-05-30 05:53:09 +00:00
|
|
|
}
|
|
|
|
|
2018-07-05 19:56:51 +00:00
|
|
|
template <typename F>
|
|
|
|
StoragePtr StorageMerge::getFirstTable(F && predicate) const
|
2017-07-12 18:44:27 +00:00
|
|
|
{
|
2020-05-28 20:10:45 +00:00
|
|
|
auto iterator = getDatabaseIterator(global_context);
|
2017-07-12 18:44:27 +00:00
|
|
|
|
2018-02-23 01:00:47 +00:00
|
|
|
while (iterator->isValid())
|
2017-07-12 18:44:27 +00:00
|
|
|
{
|
2020-04-22 06:22:14 +00:00
|
|
|
const auto & table = iterator->table();
|
2019-06-02 12:11:01 +00:00
|
|
|
if (table.get() != this && predicate(table))
|
|
|
|
return table;
|
2018-02-23 01:00:47 +00:00
|
|
|
|
|
|
|
iterator->next();
|
2017-07-12 18:44:27 +00:00
|
|
|
}
|
2018-02-23 01:00:47 +00:00
|
|
|
|
2018-07-05 19:56:51 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool StorageMerge::isRemote() const
|
|
|
|
{
|
2020-06-02 02:06:16 +00:00
|
|
|
auto first_remote_table = getFirstTable([](const StoragePtr & table) { return table && table->isRemote(); });
|
2018-07-05 19:56:51 +00:00
|
|
|
return first_remote_table != nullptr;
|
2017-07-12 18:44:27 +00:00
|
|
|
}
|
|
|
|
|
2017-07-21 20:59:01 +00:00
|
|
|
|
2020-06-17 09:38:47 +00:00
|
|
|
bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const
|
2018-03-16 09:00:04 +00:00
|
|
|
{
|
|
|
|
/// It's beneficial if it is true for at least one table.
|
2020-04-09 18:10:27 +00:00
|
|
|
StorageListWithLocks selected_tables = getSelectedTables(
|
|
|
|
query_context.getCurrentQueryId(), query_context.getSettingsRef());
|
2018-03-16 09:00:04 +00:00
|
|
|
|
|
|
|
size_t i = 0;
|
|
|
|
for (const auto & table : selected_tables)
|
|
|
|
{
|
2020-06-17 09:38:47 +00:00
|
|
|
auto storage_ptr = std::get<0>(table);
|
|
|
|
auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
|
|
|
|
if (storage_ptr->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot))
|
2018-03-16 09:00:04 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
++i;
|
|
|
|
/// For simplicity reasons, check only first ten tables.
|
|
|
|
if (i > 10)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-08 00:45:11 +00:00
|
|
|
QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
|
2018-04-19 14:47:09 +00:00
|
|
|
{
|
|
|
|
auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
|
|
|
|
|
2020-05-28 20:10:45 +00:00
|
|
|
DatabaseTablesIteratorPtr iterator = getDatabaseIterator(context);
|
2018-04-19 14:47:09 +00:00
|
|
|
|
2018-09-08 11:29:23 +00:00
|
|
|
size_t selected_table_size = 0;
|
2018-04-19 14:47:09 +00:00
|
|
|
|
|
|
|
while (iterator->isValid())
|
|
|
|
{
|
2020-04-22 06:22:14 +00:00
|
|
|
const auto & table = iterator->table();
|
2020-06-02 02:06:16 +00:00
|
|
|
if (table && table.get() != this)
|
2018-04-19 14:47:09 +00:00
|
|
|
{
|
2019-06-02 12:11:01 +00:00
|
|
|
++selected_table_size;
|
2020-04-01 18:38:01 +00:00
|
|
|
stage_in_source_tables = std::max(stage_in_source_tables, table->getQueryProcessingStage(context, to_stage, query_ptr));
|
2018-04-19 14:47:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
iterator->next();
|
|
|
|
}
|
|
|
|
|
2018-09-20 05:40:06 +00:00
|
|
|
return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
|
2018-04-19 14:47:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-02-19 16:07:28 +00:00
|
|
|
Pipes StorageMerge::read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2020-06-16 14:25:08 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2017-07-15 03:48:36 +00:00
|
|
|
const SelectQueryInfo & query_info,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
const size_t max_block_size,
|
2019-07-07 00:20:38 +00:00
|
|
|
unsigned num_streams)
|
2012-05-30 05:53:09 +00:00
|
|
|
{
|
2020-01-31 16:29:40 +00:00
|
|
|
Pipes res;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-02-23 01:00:47 +00:00
|
|
|
bool has_table_virtual_column = false;
|
|
|
|
Names real_column_names;
|
|
|
|
real_column_names.reserve(column_names.size());
|
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
for (const auto & column_name : column_names)
|
2018-02-23 01:00:47 +00:00
|
|
|
{
|
2020-06-17 14:37:21 +00:00
|
|
|
if (column_name == "_table" && isVirtualColumn(column_name, metadata_snapshot))
|
2018-02-23 01:00:47 +00:00
|
|
|
has_table_virtual_column = true;
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
2019-01-04 12:10:00 +00:00
|
|
|
real_column_names.push_back(column_name);
|
2018-02-23 01:00:47 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/** Just in case, turn off optimization "transfer to PREWHERE",
|
|
|
|
* since there is no certainty that it works when one of table is MergeTree and other is not.
|
|
|
|
*/
|
2020-02-26 14:13:41 +00:00
|
|
|
auto modified_context = std::make_shared<Context>(context);
|
2020-03-13 14:50:26 +00:00
|
|
|
modified_context->setSetting("optimize_move_to_prewhere", false);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-02-23 01:00:47 +00:00
|
|
|
/// What will be result structure depending on query processed stage in source tables?
|
2020-06-16 14:25:08 +00:00
|
|
|
Block header = getQueryHeader(column_names, metadata_snapshot, query_info, context, processed_stage);
|
2018-09-18 11:09:21 +00:00
|
|
|
|
|
|
|
/** First we make list of selected tables to find out its size.
|
|
|
|
* This is necessary to correctly pass the recommended number of threads to each table.
|
|
|
|
*/
|
2019-02-27 18:26:24 +00:00
|
|
|
StorageListWithLocks selected_tables = getSelectedTables(
|
2020-04-09 18:10:27 +00:00
|
|
|
query_info.query, has_table_virtual_column, context.getCurrentQueryId(), context.getSettingsRef());
|
2018-02-21 06:47:32 +00:00
|
|
|
|
2018-09-18 16:06:15 +00:00
|
|
|
if (selected_tables.empty())
|
2019-05-21 13:04:34 +00:00
|
|
|
/// FIXME: do we support sampling in this case?
|
2020-01-31 16:29:40 +00:00
|
|
|
return createSources(
|
2020-06-18 09:22:54 +00:00
|
|
|
{}, query_info, processed_stage, max_block_size, header, {}, real_column_names, modified_context, 0, has_table_virtual_column);
|
2018-09-18 16:06:15 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t tables_count = selected_tables.size();
|
2019-07-07 00:20:38 +00:00
|
|
|
Float64 num_streams_multiplier = std::min(unsigned(tables_count), std::max(1U, unsigned(context.getSettingsRef().max_streams_multiplier_for_merge_tables)));
|
|
|
|
num_streams *= num_streams_multiplier;
|
|
|
|
size_t remaining_streams = num_streams;
|
2018-02-21 19:42:42 +00:00
|
|
|
|
2020-05-13 13:49:10 +00:00
|
|
|
InputOrderInfoPtr input_sorting_info;
|
|
|
|
if (query_info.order_optimizer)
|
2019-12-11 01:35:47 +00:00
|
|
|
{
|
|
|
|
for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
|
|
|
|
{
|
2020-06-17 11:05:11 +00:00
|
|
|
auto storage_ptr = std::get<0>(*it);
|
|
|
|
auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
|
|
|
|
auto current_info = query_info.order_optimizer->getInputOrder(storage_ptr, storage_metadata_snapshot);
|
2019-12-11 01:35:47 +00:00
|
|
|
if (it == selected_tables.begin())
|
|
|
|
input_sorting_info = current_info;
|
2019-12-11 13:09:46 +00:00
|
|
|
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
|
2019-12-11 01:35:47 +00:00
|
|
|
input_sorting_info.reset();
|
|
|
|
|
|
|
|
if (!input_sorting_info)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-05-13 13:49:10 +00:00
|
|
|
query_info.input_order_info = input_sorting_info;
|
2019-12-11 01:35:47 +00:00
|
|
|
}
|
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
for (const auto & table : selected_tables)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-11-26 00:56:50 +00:00
|
|
|
size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count);
|
2018-09-18 11:09:21 +00:00
|
|
|
size_t current_streams = std::min(current_need_streams, remaining_streams);
|
|
|
|
remaining_streams -= current_streams;
|
2019-01-09 15:44:20 +00:00
|
|
|
current_streams = std::max(size_t(1), current_streams);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-22 06:22:14 +00:00
|
|
|
const auto & storage = std::get<0>(table);
|
2017-07-21 20:59:01 +00:00
|
|
|
|
2019-05-21 13:04:34 +00:00
|
|
|
/// If sampling requested, then check that table supports it.
|
2020-03-23 02:12:31 +00:00
|
|
|
if (query_info.query->as<ASTSelectQuery>()->sampleSize() && !storage->supportsSampling())
|
2019-05-21 13:04:34 +00:00
|
|
|
throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
|
|
|
|
|
2020-06-18 09:22:54 +00:00
|
|
|
auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
|
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
auto source_pipes = createSources(
|
2020-06-18 09:22:54 +00:00
|
|
|
storage_metadata_snapshot, query_info, processed_stage,
|
2020-06-16 15:51:29 +00:00
|
|
|
max_block_size, header, table, real_column_names, modified_context,
|
|
|
|
current_streams, has_table_virtual_column);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
for (auto & pipe : source_pipes)
|
|
|
|
res.emplace_back(std::move(pipe));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2018-02-22 21:35:17 +00:00
|
|
|
if (res.empty())
|
|
|
|
return res;
|
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
return narrowPipes(std::move(res), num_streams);
|
2012-05-30 05:53:09 +00:00
|
|
|
}
|
|
|
|
|
2020-06-16 15:51:29 +00:00
|
|
|
Pipes StorageMerge::createSources(
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const SelectQueryInfo & query_info,
|
|
|
|
const QueryProcessingStage::Enum & processed_stage,
|
|
|
|
const UInt64 max_block_size,
|
|
|
|
const Block & header,
|
|
|
|
const StorageWithLockAndName & storage_with_lock,
|
2020-01-31 16:29:40 +00:00
|
|
|
Names & real_column_names,
|
2020-06-16 15:51:29 +00:00
|
|
|
const std::shared_ptr<Context> & modified_context,
|
|
|
|
size_t streams_num,
|
|
|
|
bool has_table_virtual_column,
|
2020-01-31 16:29:40 +00:00
|
|
|
bool concat_streams)
|
2014-02-11 18:38:21 +00:00
|
|
|
{
|
2020-04-22 06:22:14 +00:00
|
|
|
const auto & [storage, struct_lock, table_name] = storage_with_lock;
|
2018-11-08 17:28:52 +00:00
|
|
|
SelectQueryInfo modified_query_info = query_info;
|
2018-09-18 11:09:21 +00:00
|
|
|
modified_query_info.query = query_info.query->clone();
|
|
|
|
|
2019-12-27 19:30:22 +00:00
|
|
|
VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name);
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
Pipes pipes;
|
|
|
|
|
2018-09-19 10:16:30 +00:00
|
|
|
if (!storage)
|
2020-01-31 16:29:40 +00:00
|
|
|
{
|
2020-06-17 16:39:58 +00:00
|
|
|
auto pipe = InterpreterSelectQuery(
|
|
|
|
modified_query_info.query, *modified_context,
|
|
|
|
std::make_shared<OneBlockInputStream>(header),
|
|
|
|
SelectQueryOptions(processed_stage).analyze()).execute().pipeline.getPipe();
|
|
|
|
|
2020-02-26 14:13:41 +00:00
|
|
|
pipe.addInterpreterContext(modified_context);
|
|
|
|
pipes.emplace_back(std::move(pipe));
|
2020-01-31 16:29:40 +00:00
|
|
|
return pipes;
|
|
|
|
}
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2020-04-01 18:38:01 +00:00
|
|
|
auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, query_info.query);
|
2020-04-01 18:38:01 +00:00
|
|
|
if (processed_stage <= storage_stage)
|
2018-09-18 11:09:21 +00:00
|
|
|
{
|
2018-09-18 16:06:15 +00:00
|
|
|
/// If there are only virtual columns in query, you must request at least one other column.
|
2019-12-30 18:20:43 +00:00
|
|
|
if (real_column_names.empty())
|
2020-06-17 16:39:58 +00:00
|
|
|
real_column_names.push_back(ExpressionActions::getSmallestColumn(metadata_snapshot->getColumns().getAllPhysical()));
|
2018-09-18 16:06:15 +00:00
|
|
|
|
2020-06-18 09:22:54 +00:00
|
|
|
|
2020-06-15 19:08:58 +00:00
|
|
|
pipes = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num));
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
2020-04-01 18:38:01 +00:00
|
|
|
else if (processed_stage > storage_stage)
|
2018-09-18 11:09:21 +00:00
|
|
|
{
|
2019-12-27 19:30:22 +00:00
|
|
|
modified_query_info.query->as<ASTSelectQuery>()->replaceDatabaseAndTable(source_database, table_name);
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2018-09-20 05:40:06 +00:00
|
|
|
/// Maximum permissible parallelism is streams_num
|
2020-03-13 14:50:26 +00:00
|
|
|
modified_context->setSetting("max_threads", streams_num);
|
|
|
|
modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
|
2018-09-20 05:40:06 +00:00
|
|
|
|
2020-02-26 14:13:41 +00:00
|
|
|
InterpreterSelectQuery interpreter{modified_query_info.query, *modified_context, SelectQueryOptions(processed_stage)};
|
2020-02-17 11:50:53 +00:00
|
|
|
|
|
|
|
{
|
2020-05-28 10:57:04 +00:00
|
|
|
Pipe pipe = interpreter.execute().pipeline.getPipe();
|
2020-02-17 11:50:53 +00:00
|
|
|
pipes.emplace_back(std::move(pipe));
|
|
|
|
}
|
2018-09-18 11:09:21 +00:00
|
|
|
|
|
|
|
/** Materialization is needed, since from distributed storage the constants come materialized.
|
|
|
|
* If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
|
|
|
|
* And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
|
|
|
|
*/
|
2020-02-17 11:50:53 +00:00
|
|
|
pipes.back().addSimpleTransform(std::make_shared<MaterializingTransform>(pipes.back().getHeader()));
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
2014-02-11 18:38:21 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
if (!pipes.empty())
|
2018-09-18 16:06:15 +00:00
|
|
|
{
|
2020-01-31 16:29:40 +00:00
|
|
|
if (concat_streams && pipes.size() > 1)
|
2018-09-19 10:16:30 +00:00
|
|
|
{
|
2020-01-31 16:29:40 +00:00
|
|
|
auto concat = std::make_shared<ConcatProcessor>(pipes.at(0).getHeader(), pipes.size());
|
|
|
|
Pipe pipe(std::move(pipes), std::move(concat));
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2020-02-18 16:23:29 +00:00
|
|
|
pipes = Pipes();
|
2020-01-31 16:29:40 +00:00
|
|
|
pipes.emplace_back(std::move(pipe));
|
2018-09-19 10:16:30 +00:00
|
|
|
}
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
for (auto & pipe : pipes)
|
2018-09-19 10:16:30 +00:00
|
|
|
{
|
|
|
|
if (has_table_virtual_column)
|
2020-01-31 16:29:40 +00:00
|
|
|
pipe.addSimpleTransform(std::make_shared<AddingConstColumnTransform<String>>(
|
|
|
|
pipe.getHeader(), std::make_shared<DataTypeString>(), table_name, "_table"));
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2018-09-19 10:16:30 +00:00
|
|
|
/// Subordinary tables could have different but convertible types, like numeric types of different width.
|
|
|
|
/// We must return streams with structure equals to structure of Merge table.
|
2020-06-16 15:51:29 +00:00
|
|
|
convertingSourceStream(header, metadata_snapshot, *modified_context, modified_query_info.query, pipe, processed_stage);
|
2014-02-11 18:38:21 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
pipe.addTableLock(struct_lock);
|
2020-02-26 14:13:41 +00:00
|
|
|
pipe.addInterpreterContext(modified_context);
|
|
|
|
|
2018-09-19 10:16:30 +00:00
|
|
|
}
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2020-01-31 16:29:40 +00:00
|
|
|
return pipes;
|
2014-02-11 18:38:21 +00:00
|
|
|
}
|
|
|
|
|
2019-06-02 12:11:01 +00:00
|
|
|
|
2020-04-09 18:10:27 +00:00
|
|
|
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id, const Settings & settings) const
|
2013-09-23 12:01:19 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
StorageListWithLocks selected_tables;
|
2020-05-28 20:10:45 +00:00
|
|
|
auto iterator = getDatabaseIterator(global_context);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
while (iterator->isValid())
|
|
|
|
{
|
2020-04-22 06:22:14 +00:00
|
|
|
const auto & table = iterator->table();
|
2020-06-02 02:06:16 +00:00
|
|
|
if (table && table.get() != this)
|
2020-04-09 18:10:27 +00:00
|
|
|
selected_tables.emplace_back(
|
2020-06-18 16:10:47 +00:00
|
|
|
table, table->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
iterator->next();
|
|
|
|
}
|
|
|
|
|
|
|
|
return selected_tables;
|
2012-05-30 05:53:09 +00:00
|
|
|
}
|
2013-09-23 12:01:19 +00:00
|
|
|
|
|
|
|
|
2020-04-09 18:10:27 +00:00
|
|
|
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
|
|
|
|
const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const
|
2018-09-18 11:09:21 +00:00
|
|
|
{
|
|
|
|
StorageListWithLocks selected_tables;
|
2020-05-28 20:10:45 +00:00
|
|
|
DatabaseTablesIteratorPtr iterator = getDatabaseIterator(global_context);
|
2018-09-18 11:09:21 +00:00
|
|
|
|
|
|
|
auto virtual_column = ColumnString::create();
|
|
|
|
|
|
|
|
while (iterator->isValid())
|
|
|
|
{
|
2019-06-02 12:11:01 +00:00
|
|
|
StoragePtr storage = iterator->table();
|
2020-06-02 02:06:16 +00:00
|
|
|
if (!storage)
|
|
|
|
continue;
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2019-06-02 12:11:01 +00:00
|
|
|
if (query && query->as<ASTSelectQuery>()->prewhere() && !storage->supportsPrewhere())
|
|
|
|
throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE);
|
2018-09-18 11:09:21 +00:00
|
|
|
|
2019-06-02 12:11:01 +00:00
|
|
|
if (storage.get() != this)
|
|
|
|
{
|
2020-04-09 18:10:27 +00:00
|
|
|
selected_tables.emplace_back(
|
2020-06-18 16:10:47 +00:00
|
|
|
storage, storage->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name());
|
2019-12-30 18:20:43 +00:00
|
|
|
virtual_column->insert(iterator->name());
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
iterator->next();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (has_virtual_column)
|
|
|
|
{
|
|
|
|
Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")};
|
2019-01-04 12:10:00 +00:00
|
|
|
VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context);
|
2018-09-18 11:09:21 +00:00
|
|
|
auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table");
|
|
|
|
|
|
|
|
/// Remove unused tables from the list
|
2019-12-30 18:20:43 +00:00
|
|
|
selected_tables.remove_if([&] (const auto & elem) { return values.find(std::get<2>(elem)) == values.end(); });
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return selected_tables;
|
|
|
|
}
|
|
|
|
|
2019-06-02 12:11:01 +00:00
|
|
|
|
2020-05-28 20:10:45 +00:00
|
|
|
DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const Context & context) const
|
2019-06-02 12:11:01 +00:00
|
|
|
{
|
2019-08-10 17:51:47 +00:00
|
|
|
checkStackSize();
|
2020-02-21 15:22:28 +00:00
|
|
|
auto database = DatabaseCatalog::instance().getDatabase(source_database);
|
2019-07-09 15:40:21 +00:00
|
|
|
auto table_name_match = [this](const String & table_name_) { return table_name_regexp.match(table_name_); };
|
2020-05-28 20:10:45 +00:00
|
|
|
return database->getTablesIterator(context, table_name_match);
|
2019-06-02 12:11:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-06-10 11:16:31 +00:00
|
|
|
void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
|
2019-12-26 18:17:05 +00:00
|
|
|
{
|
|
|
|
for (const auto & command : commands)
|
|
|
|
{
|
|
|
|
if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
|
|
|
|
&& command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN)
|
|
|
|
throw Exception(
|
|
|
|
"Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
|
|
|
|
ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-05 10:12:20 +00:00
|
|
|
void StorageMerge::alter(
|
2020-06-18 16:10:47 +00:00
|
|
|
const AlterCommands & params, const Context & context, TableLockHolder &)
|
2013-09-23 12:01:19 +00:00
|
|
|
{
|
2019-12-03 16:25:32 +00:00
|
|
|
auto table_id = getStorageID();
|
2016-05-13 21:08:19 +00:00
|
|
|
|
2019-12-26 18:17:05 +00:00
|
|
|
StorageInMemoryMetadata storage_metadata = getInMemoryMetadata();
|
2020-05-28 12:37:05 +00:00
|
|
|
params.apply(storage_metadata, context);
|
2020-03-23 22:40:40 +00:00
|
|
|
DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, storage_metadata);
|
2020-06-15 16:55:33 +00:00
|
|
|
setInMemoryMetadata(storage_metadata);
|
2013-09-23 12:01:19 +00:00
|
|
|
}
|
2014-07-11 08:12:03 +00:00
|
|
|
|
2018-09-18 11:09:21 +00:00
|
|
|
Block StorageMerge::getQueryHeader(
|
2020-06-16 14:25:08 +00:00
|
|
|
const Names & column_names,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const SelectQueryInfo & query_info,
|
|
|
|
const Context & context,
|
|
|
|
QueryProcessingStage::Enum processed_stage)
|
2018-09-18 11:09:21 +00:00
|
|
|
{
|
|
|
|
switch (processed_stage)
|
|
|
|
{
|
|
|
|
case QueryProcessingStage::FetchColumns:
|
|
|
|
{
|
2020-06-16 14:25:08 +00:00
|
|
|
Block header = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals());
|
2018-09-18 11:09:21 +00:00
|
|
|
if (query_info.prewhere_info)
|
|
|
|
{
|
|
|
|
query_info.prewhere_info->prewhere_actions->execute(header);
|
|
|
|
if (query_info.prewhere_info->remove_prewhere_column)
|
|
|
|
header.erase(query_info.prewhere_info->prewhere_column_name);
|
|
|
|
}
|
|
|
|
return header;
|
|
|
|
}
|
|
|
|
case QueryProcessingStage::WithMergeableState:
|
|
|
|
case QueryProcessingStage::Complete:
|
2020-06-16 08:08:32 +00:00
|
|
|
return InterpreterSelectQuery(
|
2020-06-16 14:25:08 +00:00
|
|
|
query_info.query, context, std::make_shared<OneBlockInputStream>(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals())),
|
2020-06-16 08:08:32 +00:00
|
|
|
SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
2018-09-20 05:40:06 +00:00
|
|
|
throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR);
|
2018-09-18 11:09:21 +00:00
|
|
|
}
|
|
|
|
|
2020-06-16 15:51:29 +00:00
|
|
|
void StorageMerge::convertingSourceStream(
|
|
|
|
const Block & header,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const Context & context,
|
|
|
|
ASTPtr & query,
|
|
|
|
Pipe & pipe,
|
|
|
|
QueryProcessingStage::Enum processed_stage)
|
2018-09-19 10:16:30 +00:00
|
|
|
{
|
2020-01-31 16:29:40 +00:00
|
|
|
Block before_block_header = pipe.getHeader();
|
2020-04-14 21:05:45 +00:00
|
|
|
pipe.addSimpleTransform(std::make_shared<ConvertingTransform>(before_block_header, header, ConvertingTransform::MatchColumnsMode::Name));
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
auto where_expression = query->as<ASTSelectQuery>()->where();
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2018-09-20 05:40:06 +00:00
|
|
|
if (!where_expression)
|
2018-09-19 10:16:30 +00:00
|
|
|
return;
|
|
|
|
|
2018-09-20 05:40:06 +00:00
|
|
|
for (size_t column_index : ext::range(0, header.columns()))
|
2018-09-19 10:16:30 +00:00
|
|
|
{
|
2018-09-20 05:40:06 +00:00
|
|
|
ColumnWithTypeAndName header_column = header.getByPosition(column_index);
|
|
|
|
ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name);
|
|
|
|
/// If the processed_stage greater than FetchColumns and the block structure between streams is different.
|
|
|
|
/// the where expression maybe invalid because of convertingBlockInputStream.
|
|
|
|
/// So we need to throw exception.
|
|
|
|
if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns)
|
2018-09-19 10:16:30 +00:00
|
|
|
{
|
2020-06-16 15:51:29 +00:00
|
|
|
NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList();
|
2020-04-27 15:38:35 +00:00
|
|
|
auto virtual_column = *getVirtuals().tryGetByName("_table");
|
2020-04-24 10:20:03 +00:00
|
|
|
source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type});
|
2019-01-09 16:16:59 +00:00
|
|
|
auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns);
|
2018-11-08 17:28:52 +00:00
|
|
|
ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false);
|
2018-09-20 05:40:06 +00:00
|
|
|
Names required_columns = actions->getRequiredColumns();
|
2018-09-19 10:16:30 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
for (const auto & required_column : required_columns)
|
2018-09-20 05:40:06 +00:00
|
|
|
{
|
|
|
|
if (required_column == header_column.name)
|
|
|
|
throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure()
|
|
|
|
+ "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
|
|
|
|
}
|
2018-09-19 10:16:30 +00:00
|
|
|
}
|
2018-09-20 05:40:06 +00:00
|
|
|
|
2018-09-19 10:16:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-30 00:36:06 +00:00
|
|
|
|
|
|
|
void registerStorageMerge(StorageFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerStorage("Merge", [](const StorageFactory::Arguments & args)
|
|
|
|
{
|
|
|
|
/** In query, the name of database is specified as table engine argument which contains source tables,
|
|
|
|
* as well as regex for source-table names.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ASTs & engine_args = args.engine_args;
|
|
|
|
|
|
|
|
if (engine_args.size() != 2)
|
|
|
|
throw Exception("Storage Merge requires exactly 2 parameters"
|
|
|
|
" - name of source database and regexp for table names.",
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
2020-02-19 18:58:29 +00:00
|
|
|
engine_args[0] = evaluateConstantExpressionForDatabaseName(engine_args[0], args.local_context);
|
2017-12-30 00:36:06 +00:00
|
|
|
engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context);
|
|
|
|
|
2019-03-15 17:09:14 +00:00
|
|
|
String source_database = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
|
|
|
String table_name_regexp = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
2017-12-30 00:36:06 +00:00
|
|
|
|
|
|
|
return StorageMerge::create(
|
2019-12-04 16:06:55 +00:00
|
|
|
args.table_id, args.columns,
|
2017-12-30 00:36:06 +00:00
|
|
|
source_database, table_name_regexp, args.context);
|
|
|
|
});
|
|
|
|
}
|
2020-04-28 10:38:57 +00:00
|
|
|
|
|
|
|
NamesAndTypesList StorageMerge::getVirtuals() const
|
2020-04-27 13:55:30 +00:00
|
|
|
{
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList virtuals{{"_table", std::make_shared<DataTypeString>()}};
|
|
|
|
|
2020-06-02 02:06:16 +00:00
|
|
|
auto first_table = getFirstTable([](auto && table) { return table; });
|
2020-04-28 10:38:57 +00:00
|
|
|
if (first_table)
|
|
|
|
{
|
|
|
|
auto table_virtuals = first_table->getVirtuals();
|
|
|
|
virtuals.insert(virtuals.end(), table_virtuals.begin(), table_virtuals.end());
|
|
|
|
}
|
|
|
|
|
2020-04-27 17:46:51 +00:00
|
|
|
return virtuals;
|
2020-04-27 13:55:30 +00:00
|
|
|
}
|
2013-09-23 12:01:19 +00:00
|
|
|
}
|