Merge pull request #59899 from ClickHouse/refactor-squashing-for-inserts

Refactor Squashing for inserts.
2024-09-20 08:40:50 +00:00 · 2024-02-14 19:29:11 +01:00 · 2024-02-14 19:29:11 +01:00 · 9c626f9c36
commit 9c626f9c36
parent 395faaf980 3d86e87f7f
3 changed files with 81 additions and 34 deletions
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -274,7 +274,7 @@ Chain InterpreterInsertQuery::buildChain(
    auto sample = getSampleBlock(columns, table, metadata_snapshot);

    Chain sink = buildSink(table, metadata_snapshot, thread_status_holder, running_group, elapsed_counter_ms);
-    Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample, thread_status_holder);
+    Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample);

    chain.appendChain(std::move(sink));
    return chain;
@ -317,25 +317,31 @@ Chain InterpreterInsertQuery::buildSink(
    return out;
 }

+bool InterpreterInsertQuery::shouldAddSquashingFroStorage(const StoragePtr & table) const
+{
+    auto context_ptr = getContext();
+    const Settings & settings = context_ptr->getSettingsRef();
+    const ASTInsertQuery * query = nullptr;
+    if (query_ptr)
+        query = query_ptr->as<ASTInsertQuery>();
+
+    /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
+    /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
+    return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch);
+}
+
 Chain InterpreterInsertQuery::buildPreSinkChain(
    const Block & subsequent_header,
    const StoragePtr & table,
    const StorageMetadataPtr & metadata_snapshot,
-    const Block & query_sample_block,
-    ThreadStatusesHolderPtr thread_status_holder)
+    const Block & query_sample_block)
 {
-    ThreadStatus * thread_status = current_thread;
-
-    if (!thread_status_holder)
-        thread_status = nullptr;
-
    auto context_ptr = getContext();

    const ASTInsertQuery * query = nullptr;
    if (query_ptr)
        query = query_ptr->as<ASTInsertQuery>();

-    const Settings & settings = context_ptr->getSettingsRef();
    bool null_as_default = query && query->select && context_ptr->getSettingsRef().insert_null_as_default;

    /// We create a pipeline of several streams, into which we will write data.
@ -366,26 +372,6 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
    /// because some clients break insertion protocol (columns != header)
    out.addSource(std::make_shared<ConvertingTransform>(query_sample_block, adding_missing_defaults_actions));

-    /// It's important to squash blocks as early as possible (before other transforms),
-    ///  because other transforms may work inefficient if block size is small.
-
-    /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
-    /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
-    if (!(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch))
-    {
-        bool table_prefers_large_blocks = table->prefersLargeBlocks();
-
-        out.addSource(std::make_shared<SquashingChunksTransform>(
-            input_header(),
-            table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
-            table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
-    }
-
-    auto counting = std::make_shared<CountingTransform>(input_header(), thread_status, getContext()->getQuota());
-    counting->setProcessListElement(context_ptr->getProcessListElement());
-    counting->setProgressCallback(context_ptr->getProgressCallback());
-    out.addSource(std::move(counting));
-
    return out;
 }

@ -558,8 +544,7 @@ BlockIO InterpreterInsertQuery::execute()
        }
        for (size_t i = 0; i < pre_streams_size; ++i)
        {
-            auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot,
-                query_sample_block, /* thread_status_holder= */ nullptr);
+            auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
            presink_chains.emplace_back(std::move(out));
        }
    }
@ -592,6 +577,29 @@ BlockIO InterpreterInsertQuery::execute()
            return std::make_shared<MaterializingTransform>(in_header);
        });

+        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+        {
+            auto context_ptr = getContext();
+            auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
+            counting->setProcessListElement(context_ptr->getProcessListElement());
+            counting->setProgressCallback(context_ptr->getProgressCallback());
+
+            return counting;
+        });
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+            {
+                return std::make_shared<SimpleSquashingChunksTransform>(
+                    in_header,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+            });
+        }
+
        size_t num_select_threads = pipeline.getNumThreads();

        for (auto & chain : presink_chains)
@ -634,7 +642,27 @@ BlockIO InterpreterInsertQuery::execute()
    }
    else
    {
-        presink_chains.at(0).appendChain(std::move(sink_chains.at(0)));
+        auto & chain = presink_chains.at(0);
+        chain.appendChain(std::move(sink_chains.at(0)));
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            auto squashing = std::make_shared<SimpleSquashingChunksTransform>(
+                chain.getInputHeader(),
+                table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+
+            chain.addSource(std::move(squashing));
+        }
+
+        auto context_ptr = getContext();
+        auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
+        counting->setProcessListElement(context_ptr->getProcessListElement());
+        counting->setProgressCallback(context_ptr->getProgressCallback());
+        chain.addSource(std::move(counting));
+
        res.pipeline = QueryPipeline(std::move(presink_chains[0]));
        res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
        res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@ -59,6 +59,8 @@ public:

    void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.push_back(std::move(buffer)); }

+    bool shouldAddSquashingFroStorage(const StoragePtr & table) const;
+
 private:
    Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;

@ -81,8 +83,7 @@ private:
        const Block & subsequent_header,
        const StoragePtr & table,
        const StorageMetadataPtr & metadata_snapshot,
-        const Block & query_sample_block,
-        ThreadStatusesHolderPtr thread_status_holder);
+        const Block & query_sample_block);
 };


--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@ -5,6 +5,7 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Parsers/ASTInsertQuery.h>
+#include <Processors/Transforms/CountingTransform.h>
 #include <Processors/Transforms/SquashingChunksTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
@ -412,6 +413,23 @@ Chain buildPushingToViewsChain(

            InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false);
            out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
+
+            if (interpreter.shouldAddSquashingFroStorage(inner_table))
+            {
+                bool table_prefers_large_blocks = inner_table->prefersLargeBlocks();
+                const auto & settings = view_insert_context->getSettingsRef();
+
+                out.addSource(std::make_shared<SquashingChunksTransform>(
+                    out.getInputHeader(),
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
+            }
+
+            auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), current_thread, view_insert_context->getQuota());
+            counting->setProcessListElement(view_insert_context->getProcessListElement());
+            counting->setProgressCallback(view_insert_context->getProgressCallback());
+            out.addSource(std::move(counting));
+
            out.addStorageHolder(view);
            out.addStorageHolder(inner_table);
        }