mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge pull request #14005 from ClickHouse/ucasFL-new-branch
Merging #12195
This commit is contained in:
commit
00c697df06
@ -396,6 +396,7 @@ class IColumn;
|
|||||||
\
|
\
|
||||||
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
|
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
|
||||||
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
|
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
|
||||||
|
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
|
||||||
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0)
|
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0)
|
||||||
|
|
||||||
#define FORMAT_FACTORY_SETTINGS(M) \
|
#define FORMAT_FACTORY_SETTINGS(M) \
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include <Storages/StorageValues.h>
|
#include <Storages/StorageValues.h>
|
||||||
#include <Storages/LiveView/StorageLiveView.h>
|
#include <Storages/LiveView/StorageLiveView.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -57,9 +58,9 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
|||||||
insert_context->setSetting("insert_deduplicate", false);
|
insert_context->setSetting("insert_deduplicate", false);
|
||||||
|
|
||||||
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
|
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
|
||||||
if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed)
|
if (insert_settings.min_insert_block_size_rows_for_materialized_views)
|
||||||
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
|
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
|
||||||
if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed)
|
if (insert_settings.min_insert_block_size_bytes_for_materialized_views)
|
||||||
insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
|
insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,10 +92,12 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
|||||||
/// Insert only columns returned by select.
|
/// Insert only columns returned by select.
|
||||||
auto list = std::make_shared<ASTExpressionList>();
|
auto list = std::make_shared<ASTExpressionList>();
|
||||||
const auto & inner_table_columns = inner_metadata_snapshot->getColumns();
|
const auto & inner_table_columns = inner_metadata_snapshot->getColumns();
|
||||||
for (auto & column : header)
|
for (const auto & column : header)
|
||||||
|
{
|
||||||
/// But skip columns which storage doesn't have.
|
/// But skip columns which storage doesn't have.
|
||||||
if (inner_table_columns.hasPhysical(column.name))
|
if (inner_table_columns.hasPhysical(column.name))
|
||||||
list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
|
list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
|
||||||
|
}
|
||||||
|
|
||||||
insert->columns = std::move(list);
|
insert->columns = std::move(list);
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <Interpreters/InterpreterInsertQuery.h>
|
#include <Interpreters/InterpreterInsertQuery.h>
|
||||||
|
|
||||||
|
#include <Access/AccessFlags.h>
|
||||||
#include <DataStreams/AddingDefaultBlockOutputStream.h>
|
#include <DataStreams/AddingDefaultBlockOutputStream.h>
|
||||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||||
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
|
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
|
||||||
@ -14,20 +15,20 @@
|
|||||||
#include <IO/ConcatReadBuffer.h>
|
#include <IO/ConcatReadBuffer.h>
|
||||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||||
#include <Interpreters/InterpreterWatchQuery.h>
|
#include <Interpreters/InterpreterWatchQuery.h>
|
||||||
#include <Access/AccessFlags.h>
|
|
||||||
#include <Interpreters/JoinedTables.h>
|
#include <Interpreters/JoinedTables.h>
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
#include <Parsers/ASTSelectQuery.h>
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||||
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||||
#include <Parsers/queryToString.h>
|
#include <Parsers/queryToString.h>
|
||||||
|
#include <Processors/NullSink.h>
|
||||||
|
#include <Processors/Sources/SinkToOutputStream.h>
|
||||||
|
#include <Processors/Sources/SourceFromInputStream.h>
|
||||||
|
#include <Processors/Transforms/ConvertingTransform.h>
|
||||||
#include <Storages/StorageDistributed.h>
|
#include <Storages/StorageDistributed.h>
|
||||||
#include <TableFunctions/TableFunctionFactory.h>
|
#include <TableFunctions/TableFunctionFactory.h>
|
||||||
#include <Common/checkStackSize.h>
|
#include <Common/checkStackSize.h>
|
||||||
#include <Processors/Sources/SourceFromInputStream.h>
|
|
||||||
#include <Processors/NullSink.h>
|
|
||||||
#include <Processors/Transforms/ConvertingTransform.h>
|
|
||||||
#include <Processors/Sources/SinkToOutputStream.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -92,7 +93,8 @@ Block InterpreterInsertQuery::getSampleBlock(
|
|||||||
|
|
||||||
/// The table does not have a column with that name
|
/// The table does not have a column with that name
|
||||||
if (!table_sample.has(current_name))
|
if (!table_sample.has(current_name))
|
||||||
throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(),
|
||||||
|
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||||
|
|
||||||
if (!allow_materialized && !table_sample_non_materialized.has(current_name))
|
if (!allow_materialized && !table_sample_non_materialized.has(current_name))
|
||||||
throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
|
throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
|
||||||
@ -105,6 +107,40 @@ Block InterpreterInsertQuery::getSampleBlock(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** A query that just reads all data without any complex computations or filetering.
|
||||||
|
* If we just pipe the result to INSERT, we don't have to use too many threads for read.
|
||||||
|
*/
|
||||||
|
static bool isTrivialSelect(const ASTSelectQuery & select_query)
|
||||||
|
{
|
||||||
|
const auto & tables = select_query.tables();
|
||||||
|
|
||||||
|
if (!tables)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto & tables_in_select_query = tables->as<ASTTablesInSelectQuery &>();
|
||||||
|
|
||||||
|
if (tables_in_select_query.children.size() != 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto & child = tables_in_select_query.children.front();
|
||||||
|
const auto & table_element = child->as<ASTTablesInSelectQueryElement &>();
|
||||||
|
const auto & table_expr = table_element.table_expression->as<ASTTableExpression &>();
|
||||||
|
|
||||||
|
if (table_expr.subquery)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/// Note: how to write it in more generic way?
|
||||||
|
return (!select_query.distinct
|
||||||
|
&& !select_query.limit_with_ties
|
||||||
|
&& !select_query.prewhere()
|
||||||
|
&& !select_query.where()
|
||||||
|
&& !select_query.groupBy()
|
||||||
|
&& !select_query.having()
|
||||||
|
&& !select_query.orderBy()
|
||||||
|
&& !select_query.limitBy());
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
BlockIO InterpreterInsertQuery::execute()
|
BlockIO InterpreterInsertQuery::execute()
|
||||||
{
|
{
|
||||||
const Settings & settings = context.getSettingsRef();
|
const Settings & settings = context.getSettingsRef();
|
||||||
@ -200,9 +236,47 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
size_t out_streams_size = 1;
|
size_t out_streams_size = 1;
|
||||||
if (query.select)
|
if (query.select)
|
||||||
{
|
{
|
||||||
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
bool is_trivial_insert_select = false;
|
||||||
InterpreterSelectWithUnionQuery interpreter_select{ query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
|
||||||
|
if (settings.optimize_trivial_insert_select)
|
||||||
|
{
|
||||||
|
const auto & selects = query.select->as<ASTSelectWithUnionQuery &>().list_of_selects->children;
|
||||||
|
|
||||||
|
is_trivial_insert_select = std::all_of(selects.begin(), selects.end(), [](const ASTPtr & select)
|
||||||
|
{
|
||||||
|
return isTrivialSelect(select->as<ASTSelectQuery &>());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_trivial_insert_select)
|
||||||
|
{
|
||||||
|
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
||||||
|
* don't need to process SELECT with more than max_insert_threads
|
||||||
|
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
||||||
|
* to avoid unnecessary squashing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
Settings new_settings = context.getSettings();
|
||||||
|
|
||||||
|
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
||||||
|
|
||||||
|
if (settings.min_insert_block_size_rows)
|
||||||
|
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
||||||
|
|
||||||
|
Context new_context = context;
|
||||||
|
new_context.setSettings(new_settings);
|
||||||
|
|
||||||
|
InterpreterSelectWithUnionQuery interpreter_select{
|
||||||
|
query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||||
res = interpreter_select.execute();
|
res = interpreter_select.execute();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
||||||
|
InterpreterSelectWithUnionQuery interpreter_select{
|
||||||
|
query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||||
|
res = interpreter_select.execute();
|
||||||
|
}
|
||||||
|
|
||||||
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
|
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
|
||||||
out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());
|
out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
<test>
|
<test>
|
||||||
|
<settings>
|
||||||
|
<optimize_trivial_insert_select>0</optimize_trivial_insert_select>
|
||||||
|
</settings>
|
||||||
|
|
||||||
<create_query>CREATE TABLE polygons (polygon Array(Array(Tuple(Float64, Float64)))) ENGINE = Memory</create_query>
|
<create_query>CREATE TABLE polygons (polygon Array(Array(Tuple(Float64, Float64)))) ENGINE = Memory</create_query>
|
||||||
<create_query>
|
<create_query>
|
||||||
INSERT INTO polygons
|
INSERT INTO polygons
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
DROP TABLE IF EXISTS numbers_squashed;
|
DROP TABLE IF EXISTS numbers_squashed;
|
||||||
CREATE TABLE numbers_squashed AS system.numbers ENGINE = StripeLog;
|
CREATE TABLE numbers_squashed AS system.numbers ENGINE = StripeLog;
|
||||||
|
|
||||||
|
SET optimize_trivial_insert_select = 'false';
|
||||||
SET max_block_size = 10000;
|
SET max_block_size = 10000;
|
||||||
|
|
||||||
SET min_insert_block_size_rows = 1000000;
|
SET min_insert_block_size_rows = 1000000;
|
||||||
|
@ -78,7 +78,7 @@ insert into data_01278 select
|
|||||||
from numbers(100000); -- { serverError 241; }
|
from numbers(100000); -- { serverError 241; }
|
||||||
EOL
|
EOL
|
||||||
} | {
|
} | {
|
||||||
execute --max_memory_usage=$TEST_01278_MEMORY "$@"
|
execute --max_memory_usage=$TEST_01278_MEMORY --optimize_trivial_insert_select='false' "$@"
|
||||||
}
|
}
|
||||||
echo 'select count() from out_01278' | execute
|
echo 'select count() from out_01278' | execute
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
1000000 0
|
||||||
|
1000000 1
|
@ -0,0 +1,7 @@
|
|||||||
|
SET max_insert_threads = 1, max_threads = 100, min_insert_block_size_rows = 1048576, max_block_size = 65536;
|
||||||
|
CREATE TEMPORARY TABLE t (x UInt64);
|
||||||
|
-- For trivial INSERT SELECT, max_threads is lowered to max_insert_threads and max_block_size is changed to min_insert_block_size_rows.
|
||||||
|
INSERT INTO t SELECT * FROM numbers_mt(1000000);
|
||||||
|
SET max_threads = 1;
|
||||||
|
-- If data was inserted by more threads, we will probably see data out of order.
|
||||||
|
SELECT DISTINCT blockSize(), runningDifference(x) FROM t;
|
Loading…
Reference in New Issue
Block a user