QueryPlan for StorageBuffer and StorageMaterializedView read.

This commit is contained in:
Nikolai Kochetov 2020-09-29 19:21:58 +03:00
parent 576ffadb17
commit 5ac6bc071d
9 changed files with 226 additions and 57 deletions

View File

@ -0,0 +1,45 @@
#include <Processors/QueryPlan/AddingMissedStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/AddingMissedTransform.h>
#include <IO/Operators.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
AddingMissedStep::AddingMissedStep(
const DataStream & input_stream_,
Block result_header_,
const ColumnDefaults & column_defaults_,
const Context & context_)
: ITransformingStep(input_stream_, result_header_, getTraits())
, column_defaults(column_defaults_)
, context(context_)
{
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void AddingMissedStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingMissedTransform>(header, output_stream->header, column_defaults, context);
});
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
namespace DB
{
struct ColumnDefault;
using ColumnDefaults = std::unordered_map<std::string, ColumnDefault>;
/// Convert one block structure to another. See ConvertingTransform.
class AddingMissedStep : public ITransformingStep
{
public:
AddingMissedStep(const DataStream & input_stream_,
Block result_header_,
const ColumnDefaults & column_defaults_,
const Context & context_);
String getName() const override { return "AddingMissed"; }
void transformPipeline(QueryPipeline & pipeline) override;
private:
const ColumnDefaults column_defaults;
const Context & context;
};
}

View File

@ -59,7 +59,10 @@ void SettingQuotaAndLimitsStep::transformPipeline(QueryPipeline & pipeline)
if (quota)
pipeline.setQuota(quota);
if (context)
pipeline.addInterpreterContext(std::move(context));
if (storage)
pipeline.addStorageHolder(std::move(storage));
}

View File

@ -89,6 +89,7 @@ SRCS(
printPipeline.cpp
QueryPipeline.cpp
QueryPlan/AddingDelayedSourceStep.cpp
QueryPlan/AddingMissedStep.cpp
QueryPlan/AggregatingStep.cpp
QueryPlan/ArrayJoinStep.cpp
QueryPlan/ConvertingStep.cpp

View File

@ -4,7 +4,7 @@
#include <Interpreters/InterpreterAlterQuery.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Processors/Transforms/AddingMissedTransform.h>
#include <Processors/QueryPlan/AddingMissedStep.h>
#include <DataStreams/IBlockInputStream.h>
#include <Storages/StorageBuffer.h>
#include <Storages/StorageFactory.h>
@ -22,10 +22,13 @@
#include <common/logger_useful.h>
#include <common/getThreadId.h>
#include <ext/range.h>
#include <Processors/Transforms/ConvertingTransform.h>
#include <Processors/QueryPlan/ConvertingStep.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/UnionStep.h>
namespace ProfileEvents
@ -147,6 +150,21 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context
Pipe StorageBuffer::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
const size_t max_block_size,
const unsigned num_streams)
{
QueryPlan plan;
read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
return QueryPipeline::getPipe(std::move(*plan.buildQueryPipeline()));
}
void StorageBuffer::read(
QueryPlan & query_plan,
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
@ -155,8 +173,6 @@ Pipe StorageBuffer::read(
size_t max_block_size,
unsigned num_streams)
{
Pipe pipe_from_dst;
if (destination_id)
{
auto destination = DatabaseCatalog::instance().getTable(destination_id, context);
@ -182,8 +198,8 @@ Pipe StorageBuffer::read(
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination, destination_metadata_snapshot);
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
pipe_from_dst = destination->read(
column_names, destination_metadata_snapshot, query_info,
destination->read(
query_plan, column_names, destination_metadata_snapshot, query_info,
context, processed_stage, max_block_size, num_streams);
}
else
@ -217,25 +233,45 @@ Pipe StorageBuffer::read(
}
else
{
pipe_from_dst = destination->read(
columns_intersection, destination_metadata_snapshot, query_info,
destination->read(
query_plan, columns_intersection, destination_metadata_snapshot, query_info,
context, processed_stage, max_block_size, num_streams);
pipe_from_dst.addSimpleTransform([&](const Block & stream_header)
{
return std::make_shared<AddingMissedTransform>(stream_header, header_after_adding_defaults,
auto adding_missed = std::make_unique<AddingMissedStep>(
query_plan.getCurrentDataStream(),
header_after_adding_defaults,
metadata_snapshot->getColumns().getDefaults(), context);
});
pipe_from_dst.addSimpleTransform([&](const Block & stream_header)
adding_missed->setStepDescription("Add columns missing in destination table");
query_plan.addStep(std::move(adding_missed));
auto converting = std::make_unique<ConvertingStep>(
query_plan.getCurrentDataStream(),
header);
converting->setStepDescription("Convert destination table columns to Buffer table structure");
query_plan.addStep(std::move(converting));
}
}
if (query_plan.isInitialized())
{
return std::make_shared<ConvertingTransform>(
stream_header, header, ConvertingTransform::MatchColumnsMode::Name);
});
}
}
StreamLocalLimits limits;
SizeLimits leaf_limits;
pipe_from_dst.addTableLock(destination_lock);
/// Add table lock for destination table.
auto adding_limits_and_quota = std::make_unique<SettingQuotaAndLimitsStep>(
query_plan.getCurrentDataStream(),
destination,
std::move(destination_lock),
limits,
leaf_limits,
nullptr,
nullptr);
adding_limits_and_quota->setStepDescription("Lock destination table for Buffer");
query_plan.addStep(std::move(adding_limits_and_quota));
}
}
Pipe pipe_from_buffers;
@ -248,27 +284,23 @@ Pipe StorageBuffer::read(
pipe_from_buffers = Pipe::unitePipes(std::move(pipes_from_buffers));
}
/// Convert pipes from table to structure from buffer.
if (!pipe_from_buffers.empty() && !pipe_from_dst.empty()
&& !blocksHaveEqualStructure(pipe_from_buffers.getHeader(), pipe_from_dst.getHeader()))
{
pipe_from_dst.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ConvertingTransform>(
header,
pipe_from_buffers.getHeader(),
ConvertingTransform::MatchColumnsMode::Name);
});
}
if (pipe_from_buffers.empty())
return;
QueryPlan buffers_plan;
/** If the sources from the table were processed before some non-initial stage of query execution,
* then sources from the buffers must also be wrapped in the processing pipeline before the same stage.
*/
if (processed_stage > QueryProcessingStage::FetchColumns)
pipe_from_buffers = QueryPipeline::getPipe(
InterpreterSelectQuery(query_info.query, context, std::move(pipe_from_buffers),
SelectQueryOptions(processed_stage)).execute().pipeline);
{
auto interpreter = InterpreterSelectQuery(
query_info.query, context, std::move(pipe_from_buffers),
SelectQueryOptions(processed_stage));
interpreter.buildQueryPlan(buffers_plan);
}
else
{
if (query_info.prewhere_info)
{
pipe_from_buffers.addSimpleTransform([&](const Block & header)
@ -287,10 +319,38 @@ Pipe StorageBuffer::read(
}
}
Pipes pipes;
pipes.emplace_back(std::move(pipe_from_dst));
pipes.emplace_back(std::move(pipe_from_buffers));
return Pipe::unitePipes(std::move(pipes));
auto read_from_buffers = std::make_unique<ReadFromPreparedSource>(std::move(pipe_from_buffers));
read_from_buffers->setStepDescription("Read from buffers of Buffer table");
buffers_plan.addStep(std::move(read_from_buffers));
}
if (!query_plan.isInitialized())
{
query_plan = std::move(buffers_plan);
return;
}
auto result_header = buffers_plan.getCurrentDataStream().header;
/// Convert structure from table to structure from buffer.
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
{
auto converting = std::make_unique<ConvertingStep>(query_plan.getCurrentDataStream(), result_header);
query_plan.addStep(std::move(converting));
}
DataStreams input_streams;
input_streams.emplace_back(query_plan.getCurrentDataStream());
input_streams.emplace_back(buffers_plan.getCurrentDataStream());
std::vector<std::unique_ptr<QueryPlan>> plans;
plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
plans.emplace_back(std::make_unique<QueryPlan>(std::move(buffers_plan)));
query_plan = QueryPlan();
auto union_step = std::make_unique<UnionStep>(std::move(input_streams), result_header);
union_step->setStepDescription("Unite sources from Buffer table");
query_plan.unitePlans(std::move(union_step), std::move(plans));
}

View File

@ -65,6 +65,16 @@ public:
size_t max_block_size,
unsigned num_streams) override;
void read(
QueryPlan & query_plan,
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
bool supportsParallelInsert() const override { return true; }
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;

View File

@ -107,6 +107,21 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(cons
}
Pipe StorageMaterializedView::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
const size_t max_block_size,
const unsigned num_streams)
{
QueryPlan plan;
read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
return QueryPipeline::getPipe(std::move(*plan.buildQueryPipeline()));
}
void StorageMaterializedView::read(
QueryPlan & query_plan,
const Names & column_names,
const StorageMetadataPtr & /*metadata_snapshot*/,
const SelectQueryInfo & query_info,
@ -122,10 +137,7 @@ Pipe StorageMaterializedView::read(
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot);
Pipe pipe = storage->read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
pipe.addTableLock(lock);
return pipe;
storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
}
BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context)

View File

@ -80,6 +80,16 @@ public:
size_t max_block_size,
unsigned num_streams) override;
void read(
QueryPlan & query_plan,
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
Strings getDataPaths() const override;
private:

View File

@ -10,9 +10,9 @@ SET max_threads=1;
SET optimize_move_functions_out_of_any=0;
SELECT 'LIMIT';
SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1 SETTINGS distributed_group_by_no_merge=2;
SELECT * FROM (SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1 ) ORDER BY shard_num SETTINGS distributed_group_by_no_merge=2;
SELECT 'OFFSET';
SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1, 1 SETTINGS distributed_group_by_no_merge=2;
SELECT * FROM (SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1, 1) ORDER BY shard_num SETTINGS distributed_group_by_no_merge=2;
SELECT 'ALIAS';
SELECT dummy AS d FROM remote('127.0.0.{2,3}', system.one) ORDER BY d SETTINGS distributed_group_by_no_merge=2;