Merge pull request #50214 from azat/parallelize_output_from_storages-fix

Disable parallelize_output_from_storages for processing MATERIALIZED VIEWs and storages with one block only
This commit is contained in:
Igor Nikonov 2023-06-15 12:48:54 +02:00 committed by GitHub
commit 1113a7c524
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 23 additions and 10 deletions

View File

@ -245,6 +245,10 @@ Chain buildPushingToViewsChain(
if (disable_deduplication_for_children)
insert_context->setSetting("insert_deduplicate", Field{false});
// Processing of blocks for MVs is done block by block, and there will
// be no parallel reading after (plus it is not a costless operation)
select_context->setSetting("parallelize_output_from_storages", Field{false});
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
if (insert_settings.min_insert_block_size_rows_for_materialized_views)
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);

View File

@ -376,10 +376,11 @@ private:
/// even when the storage returned only one stream of data for reading?
/// It is beneficial, for example, when you read from a file quickly,
/// but then do heavy computations on returned blocks.
/// This is enabled by default, but in some cases shouldn't be done.
/// For example, when you read from system.numbers instead of system.numbers_mt,
/// you still expect the data to be processed sequentially.
virtual bool parallelizeOutputAfterReading(ContextPtr) const { return true; }
///
/// This is enabled by default, but in some cases shouldn't be done (for
/// example it is disabled for all system tables, since it is pretty
/// useless).
virtual bool parallelizeOutputAfterReading(ContextPtr) const { return !isSystemStorage(); }
public:
/// Other version of read which adds reading step to query plan.

View File

@ -36,6 +36,8 @@ public:
/// FIXME probably it should return false, but StorageValues is used in ExecutingInnerQueryFromViewTransform (whatever it is)
bool supportsTransactions() const override { return true; }
bool parallelizeOutputAfterReading(ContextPtr) const override { return false; }
private:
Block res_block;
NamesAndTypesList virtuals;

View File

@ -40,8 +40,6 @@ public:
size_t max_block_size,
size_t num_streams) override;
bool parallelizeOutputAfterReading(ContextPtr) const override { return false; }
bool hasEvenlyDistributedRead() const override { return true; }
bool isSystemStorage() const override { return true; }
bool supportsTransactions() const override { return true; }

View File

@ -30,8 +30,6 @@ public:
size_t max_block_size,
size_t num_streams) override;
bool parallelizeOutputAfterReading(ContextPtr) const override { return false; }
bool isSystemStorage() const override { return true; }
bool supportsTransactions() const override { return true; }

View File

@ -31,8 +31,6 @@ public:
size_t max_block_size,
size_t num_streams) override;
bool parallelizeOutputAfterReading(ContextPtr) const override { return false; }
bool hasEvenlyDistributedRead() const override { return true; }
bool isSystemStorage() const override { return true; }
bool supportsTransactions() const override { return true; }

View File

@ -0,0 +1,12 @@
<test>
<create_query>CREATE TABLE input (key Int) Engine=Null</create_query>
<create_query>CREATE TABLE output (key Int) Engine=Null</create_query>
<create_query>CREATE MATERIALIZED VIEW mv TO output AS SELECT * FROM input WHERE key > 10</create_query>
<query>INSERT INTO input SELECT * FROM numbers(10e6) SETTINGS parallelize_output_from_storages=0, min_insert_block_size_rows=10000</query>
<query>INSERT INTO input SELECT * FROM numbers(10e6) SETTINGS parallelize_output_from_storages=1, min_insert_block_size_rows=10000</query>
<drop_query>DROP TABLE IF EXISTS mv</drop_query>
<drop_query>DROP TABLE IF EXISTS input</drop_query>
<drop_query>DROP TABLE IF EXISTS output</drop_query>
</test>