mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Enable parallel distributed INSERT SELECT
This commit is contained in:
parent
11d4fc98f8
commit
28dd543973
@ -112,6 +112,7 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
|
||||
\
|
||||
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
|
||||
M(SettingBool, distributed_insert_select, false, "If true, distributed insert select query in the same cluster will be processed on local tables on every shard", 0) \
|
||||
M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
|
||||
M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
|
||||
\
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
|
||||
#include <Access/AccessFlags.h>
|
||||
#include <DataStreams/AddingDefaultBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
|
||||
@ -7,18 +8,23 @@
|
||||
#include <DataStreams/CountingBlockOutputStream.h>
|
||||
#include <DataStreams/InputStreamFromASTInsertQuery.h>
|
||||
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
|
||||
#include <DataStreams/NullBlockOutputStream.h>
|
||||
#include <DataStreams/OwningBlockInputStream.h>
|
||||
#include <DataStreams/PushingToViewsBlockOutputStream.h>
|
||||
#include <DataStreams/RemoteBlockInputStream.h>
|
||||
#include <DataStreams/SquashingBlockOutputStream.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <IO/ConcatReadBuffer.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Access/AccessFlags.h>
|
||||
#include <Interpreters/JoinedTables.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/Kafka/StorageKafka.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
|
||||
@ -109,61 +115,133 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
context.checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
||||
|
||||
BlockInputStreams in_streams;
|
||||
size_t out_streams_size = 1;
|
||||
if (query.select)
|
||||
{
|
||||
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
||||
InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||
BlockOutputStreams out_streams;
|
||||
bool is_distributed_insert_select = false;
|
||||
|
||||
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
|
||||
if (query.select && table->isRemote() && settings.distributed_insert_select)
|
||||
{
|
||||
// Distributed INSERT SELECT
|
||||
std::shared_ptr<StorageDistributed> storage_src;
|
||||
auto & select_ = query.select->as<ASTSelectWithUnionQuery &>();
|
||||
auto new_query = dynamic_pointer_cast<ASTInsertQuery>(query.clone());
|
||||
if (select_.list_of_selects->children.size() == 1)
|
||||
{
|
||||
in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline);
|
||||
out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size());
|
||||
auto & select_query = select_.list_of_selects->children.at(0)->as<ASTSelectQuery &>();
|
||||
JoinedTables joined_tables(Context(context), select_query);
|
||||
|
||||
if (joined_tables.tablesCount() == 1)
|
||||
{
|
||||
storage_src = dynamic_pointer_cast<StorageDistributed>(joined_tables.getLeftTableStorage());
|
||||
if (storage_src)
|
||||
{
|
||||
const auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
||||
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
|
||||
|
||||
auto new_select_query = dynamic_pointer_cast<ASTSelectQuery>(select_query.clone());
|
||||
select_with_union_query->list_of_selects->children.push_back(new_select_query);
|
||||
|
||||
new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName());
|
||||
|
||||
new_query->select = select_with_union_query;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
auto storage_dst = dynamic_pointer_cast<StorageDistributed>(table);
|
||||
|
||||
if (storage_src && storage_dst && storage_src->cluster_name == storage_dst->cluster_name)
|
||||
{
|
||||
res = interpreter_select.execute();
|
||||
in_streams.emplace_back(res.in);
|
||||
res.in = nullptr;
|
||||
res.out = nullptr;
|
||||
is_distributed_insert_select = true;
|
||||
|
||||
const auto & cluster = storage_src->getCluster();
|
||||
const auto & shards_info = cluster->getShardsInfo();
|
||||
|
||||
String new_query_str = queryToString(new_query);
|
||||
for (size_t shard_index : ext::range(0, shards_info.size()))
|
||||
{
|
||||
const auto & shard_info = shards_info[shard_index];
|
||||
if (shard_info.isLocal())
|
||||
{
|
||||
InterpreterInsertQuery interpreter(new_query, context);
|
||||
auto block_io = interpreter.execute();
|
||||
in_streams.push_back(block_io.in);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
|
||||
auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE);
|
||||
if (connections.empty() || connections.front().isNull())
|
||||
throw Exception(
|
||||
"Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// INSERT SELECT querie return empty block
|
||||
auto in_stream = std::make_shared<RemoteBlockInputStream>(*connections.front(), new_query_str, Block{}, context);
|
||||
in_streams.push_back(in_stream);
|
||||
}
|
||||
out_streams.push_back(std::make_shared<NullBlockOutputStream>(Block()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BlockOutputStreams out_streams;
|
||||
|
||||
for (size_t i = 0; i < out_streams_size; i++)
|
||||
if (!is_distributed_insert_select)
|
||||
{
|
||||
/// We create a pipeline of several streams, into which we will write data.
|
||||
BlockOutputStreamPtr out;
|
||||
|
||||
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
|
||||
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
|
||||
if (table->noPushingToViews() && !no_destination)
|
||||
out = table->write(query_ptr, context);
|
||||
else
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(table, context, query_ptr, no_destination);
|
||||
|
||||
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
|
||||
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
|
||||
if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash)
|
||||
size_t out_streams_size = 1;
|
||||
if (query.select)
|
||||
{
|
||||
out = std::make_shared<SquashingBlockOutputStream>(
|
||||
out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
|
||||
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
||||
InterpreterSelectWithUnionQuery interpreter_select{ query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||
|
||||
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
|
||||
{
|
||||
in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline);
|
||||
out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
res = interpreter_select.execute();
|
||||
in_streams.emplace_back(res.in);
|
||||
res.in = nullptr;
|
||||
res.out = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Actually we don't know structure of input blocks from query/table,
|
||||
/// because some clients break insertion protocol (columns != header)
|
||||
out = std::make_shared<AddingDefaultBlockOutputStream>(
|
||||
out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
|
||||
for (size_t i = 0; i < out_streams_size; i++)
|
||||
{
|
||||
/// We create a pipeline of several streams, into which we will write data.
|
||||
BlockOutputStreamPtr out;
|
||||
|
||||
if (const auto & constraints = table->getConstraints(); !constraints.empty())
|
||||
out = std::make_shared<CheckConstraintsBlockOutputStream>(query.table_id,
|
||||
out, query_sample_block, table->getConstraints(), context);
|
||||
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
|
||||
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
|
||||
if (table->noPushingToViews() && !no_destination)
|
||||
out = table->write(query_ptr, context);
|
||||
else
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(table, context, query_ptr, no_destination);
|
||||
|
||||
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
|
||||
out_wrapper->setProcessListElement(context.getProcessListElement());
|
||||
out = std::move(out_wrapper);
|
||||
out_streams.emplace_back(std::move(out));
|
||||
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
|
||||
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
|
||||
if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash)
|
||||
{
|
||||
out = std::make_shared<SquashingBlockOutputStream>(
|
||||
out,
|
||||
out->getHeader(),
|
||||
context.getSettingsRef().min_insert_block_size_rows,
|
||||
context.getSettingsRef().min_insert_block_size_bytes);
|
||||
}
|
||||
|
||||
/// Actually we don't know structure of input blocks from query/table,
|
||||
/// because some clients break insertion protocol (columns != header)
|
||||
out = std::make_shared<AddingDefaultBlockOutputStream>(
|
||||
out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
|
||||
|
||||
if (const auto & constraints = table->getConstraints(); !constraints.empty())
|
||||
out = std::make_shared<CheckConstraintsBlockOutputStream>(
|
||||
query.table_id, out, query_sample_block, table->getConstraints(), context);
|
||||
|
||||
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
|
||||
out_wrapper->setProcessListElement(context.getProcessListElement());
|
||||
out = std::move(out_wrapper);
|
||||
out_streams.emplace_back(std::move(out));
|
||||
}
|
||||
}
|
||||
|
||||
/// What type of query: INSERT or INSERT SELECT?
|
||||
|
@ -0,0 +1,12 @@
|
||||
test_shard_localhost
|
||||
0
|
||||
1
|
||||
2
|
||||
test_cluster_two_shards_localhost
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
test_cluster_two_shards
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
@ -0,0 +1,70 @@
|
||||
DROP TABLE IF EXISTS local_01099_a;
|
||||
DROP TABLE IF EXISTS local_01099_b;
|
||||
DROP TABLE IF EXISTS distributed_01099_a;
|
||||
DROP TABLE IF EXISTS distributed_01099_b;
|
||||
|
||||
SET distributed_insert_select=1;
|
||||
|
||||
--
|
||||
-- test_shard_localhost
|
||||
--
|
||||
|
||||
SELECT 'test_shard_localhost';
|
||||
|
||||
CREATE TABLE local_01099_a (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE local_01099_b (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE distributed_01099_a AS local_01099_a ENGINE = Distributed('test_shard_localhost', currentDatabase(), local_01099_a, rand());
|
||||
CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_shard_localhost', currentDatabase(), local_01099_b, rand());
|
||||
|
||||
INSERT INTO local_01099_a SELECT number from system.numbers limit 3;
|
||||
INSERT INTO distributed_01099_b SELECT * from distributed_01099_a;
|
||||
|
||||
SELECT * FROM distributed_01099_b;
|
||||
|
||||
DROP TABLE local_01099_a;
|
||||
DROP TABLE local_01099_b;
|
||||
DROP TABLE distributed_01099_a;
|
||||
DROP TABLE distributed_01099_b;
|
||||
|
||||
--
|
||||
-- test_cluster_two_shards_localhost
|
||||
--
|
||||
|
||||
SELECT 'test_cluster_two_shards_localhost';
|
||||
|
||||
CREATE TABLE local_01099_a (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE local_01099_b (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE distributed_01099_a AS local_01099_a ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), local_01099_a, rand());
|
||||
CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), local_01099_b, rand());
|
||||
|
||||
INSERT INTO local_01099_a SELECT number from system.numbers limit 3;
|
||||
INSERT INTO distributed_01099_b SELECT * from distributed_01099_a;
|
||||
|
||||
SELECT number, count(number) FROM local_01099_b group by number order by number;
|
||||
|
||||
DROP TABLE local_01099_a;
|
||||
DROP TABLE local_01099_b;
|
||||
DROP TABLE distributed_01099_a;
|
||||
DROP TABLE distributed_01099_b;
|
||||
|
||||
--
|
||||
-- test_cluster_two_shards
|
||||
--
|
||||
|
||||
SELECT 'test_cluster_two_shards';
|
||||
|
||||
CREATE TABLE local_01099_a (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE local_01099_b (number UInt64) ENGINE = Log;
|
||||
CREATE TABLE distributed_01099_a AS local_01099_a ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), local_01099_a, rand());
|
||||
CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), local_01099_b, rand());
|
||||
|
||||
INSERT INTO local_01099_a SELECT number from system.numbers limit 3;
|
||||
INSERT INTO distributed_01099_b SELECT * from distributed_01099_a;
|
||||
SYSTEM FLUSH DISTRIBUTED distributed_01099_b;
|
||||
|
||||
SELECT number, count(number) FROM local_01099_b group by number order by number;
|
||||
|
||||
DROP TABLE local_01099_a;
|
||||
DROP TABLE local_01099_b;
|
||||
DROP TABLE distributed_01099_a;
|
||||
DROP TABLE distributed_01099_b;
|
Loading…
Reference in New Issue
Block a user