Merge branch 'master' of github.com:ClickHouse/ClickHouse into format-settings-parsing

2024-09-19 16:20:50 +00:00 · 2024-07-05 22:57:11 +02:00 · 2024-07-05 22:57:11 +02:00 · 278f2bb14f
commit 278f2bb14f
parent dbcc2c6979 e80a489a40
159 changed files with 1242 additions and 8743 deletions
--- a/contrib/s2geometry
+++ b/contrib/s2geometry
@ -1 +1 @@
-Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57
+Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43
--- a/contrib/s2geometry-cmake/CMakeLists.txt
+++ b/contrib/s2geometry-cmake/CMakeLists.txt
@ -1,7 +1,6 @@
 option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES})

-# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged
-if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X)
+if (NOT ENABLE_S2_GEOMETRY)
    message(STATUS "Not using S2 Geometry")
    return()
 endif()
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
    - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
    - `password` for the file on disk
    - `base_backup`: the destination of the previous backup of this source.  For example, `Disk('backups', '1.zip')`
+    - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`.
    - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
    - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
    - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
 fqdn();
 ```

-This function is case-insensitive.
+Aliases: `fullHostName`, 'FQDN'. 

 **Returned value**

--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
 | `GROUPS` frame                                                                     | ❌                                                                                                                                                                               |
 | Calculating aggregate functions over a frame (`sum(value) over (order by time)`)   | ✅ (All aggregate functions are supported)                                                                                                                                                       |
 | `rank()`, `dense_rank()`, `row_number()`                                           | ✅                                                                                                                                                                                   |
+| `percent_rank()` | ✅  Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| 
 | `lag/lead(value, offset)`                                                          | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following`                                                                 |
 | ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -626,6 +626,28 @@ static void initializeAzureSDKLogger(
 #endif
 }

+#if defined(SANITIZER)
+static std::vector<String> getSanitizerNames()
+{
+    std::vector<String> names;
+
+#if defined(ADDRESS_SANITIZER)
+    names.push_back("address");
+#endif
+#if defined(THREAD_SANITIZER)
+    names.push_back("thread");
+#endif
+#if defined(MEMORY_SANITIZER)
+    names.push_back("memory");
+#endif
+#if defined(UNDEFINED_BEHAVIOR_SANITIZER)
+    names.push_back("undefined behavior");
+#endif
+
+    return names;
+}
+#endif
+
 int Server::main(const std::vector<std::string> & /*args*/)
 try
 {
@ -716,7 +738,17 @@ try
        global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");

 #if defined(SANITIZER)
-    global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
+    auto sanitizers = getSanitizerNames();
+
+    String log_message;
+    if (sanitizers.empty())
+        log_message = "sanitizer";
+    else if (sanitizers.size() == 1)
+        log_message = fmt::format("{} sanitizer", sanitizers.front());
+    else
+        log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", "));
+
+    global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message));
 #endif

 #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@ -1093,10 +1093,4 @@ void ColumnObject::finalize()
    checkObjectHasNoAmbiguosPaths(getKeys());
 }

-void ColumnObject::updateHashFast(SipHash & hash) const
-{
-    for (const auto & entry : subcolumns)
-        for (auto & part : entry->data.data)
-            part->updateHashFast(hash);
-}
 }
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@ -242,7 +242,7 @@ public:
    const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
    void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
    void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
-    void updateHashFast(SipHash & hash) const override;
+    void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
    void expand(const Filter &, bool) override { throwMustBeConcrete(); }
    bool hasEqualValues() const override { throwMustBeConcrete(); }
    size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
--- a/src/Common/BinStringDecodeHelper.h
+++ b/src/Common/BinStringDecodeHelper.h
@ -5,7 +5,7 @@
 namespace DB
 {

-static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
+static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
 {
    if ((end - pos) & 1)
    {
@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o
    ++out;
 }

-static void inline binStringDecode(const char * pos, const char * end, char *& out)
+static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
 {
    if (pos == end)
    {
@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o
        ++out;
    }

-    assert((end - pos) % 8 == 0);
+    chassert((end - pos) % word_size == 0);

    while (end - pos != 0)
    {
--- a/src/Common/CollectionOfDerived.h
+++ b/src/Common/CollectionOfDerived.h
@ -1,184 +0,0 @@
-#pragma once
-
-#include <base/defines.h>
-
-#include <Common/Exception.h>
-
-#include <algorithm>
-#include <memory>
-#include <typeindex>
-#include <vector>
-#include <string>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-/* This is a collections of objects derived from ItemBase.
-*  Collection contains no more than one instance for each derived type.
-*  The derived type is used to access the instance.
-*/
-
-template<class ItemBase>
-class CollectionOfDerivedItems
-{
-public:
-    using Self = CollectionOfDerivedItems<ItemBase>;
-    using ItemPtr = std::shared_ptr<ItemBase>;
-
-private:
-    struct Rec
-    {
-        std::type_index type_idx;
-        ItemPtr ptr;
-
-        bool operator<(const Rec & other) const
-        {
-            return type_idx < other.type_idx;
-        }
-
-        bool operator<(const std::type_index & value) const
-        {
-            return type_idx < value;
-        }
-
-        bool operator==(const Rec & other) const
-        {
-            return type_idx == other.type_idx;
-        }
-    };
-    using Records = std::vector<Rec>;
-
-public:
-    void swap(Self & other) noexcept
-    {
-        records.swap(other.records);
-    }
-
-    void clear()
-    {
-        records.clear();
-    }
-
-    bool empty() const
-    {
-        return records.empty();
-    }
-
-    size_t size() const
-    {
-        return records.size();
-    }
-
-    Self clone() const
-    {
-        Self result;
-        result.records.reserve(records.size());
-        for (const auto & rec : records)
-            result.records.emplace_back(rec.type_idx, rec.ptr->clone());
-        return result;
-    }
-
-    void append(Self && other)
-    {
-        auto middle_idx = records.size();
-        std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
-        std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
-        chassert(isUniqTypes());
-    }
-
-    template <class T>
-    void add(std::shared_ptr<T> info)
-    {
-        static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
-        return addImpl(std::type_index(typeid(T)), std::move(info));
-    }
-
-    template <class T>
-    std::shared_ptr<T> get() const
-    {
-        static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
-        auto it = getImpl(std::type_index(typeid(T)));
-        if (it == records.cend())
-            return nullptr;
-        auto cast = std::dynamic_pointer_cast<T>(it->ptr);
-        chassert(cast);
-        return cast;
-    }
-
-    template <class T>
-    std::shared_ptr<T> extract()
-    {
-        static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
-        auto it = getImpl(std::type_index(typeid(T)));
-        if (it == records.cend())
-            return nullptr;
-        auto cast = std::dynamic_pointer_cast<T>(it->ptr);
-        chassert(cast);
-
-        records.erase(it);
-        return cast;
-    }
-
-    std::string debug() const
-    {
-        std::string result;
-
-        for (auto & rec : records)
-        {
-            result.append(rec.type_idx.name());
-            result.append(" ");
-        }
-
-        return result;
-    }
-
-private:
-    bool isUniqTypes() const
-    {
-        auto uniq_it = std::adjacent_find(records.begin(), records.end());
-
-        return uniq_it == records.end();
-    }
-
-    void addImpl(std::type_index type_idx, ItemPtr item)
-    {
-        auto it = std::lower_bound(records.begin(), records.end(), type_idx);
-
-        if (it == records.end())
-        {
-            records.emplace_back(type_idx, item);
-            return;
-        }
-
-        if (it->type_idx == type_idx)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name());
-
-
-        records.emplace(it, type_idx, item);
-
-        chassert(isUniqTypes());
-    }
-
-    Records::const_iterator getImpl(std::type_index type_idx) const
-    {
-        auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx);
-
-        if (it == records.cend())
-            return records.cend();
-
-        if (it->type_idx != type_idx)
-            return records.cend();
-
-        return it;
-    }
-
-    Records records;
-};
-
-}
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -36,7 +36,7 @@ class IColumn;
    M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
    M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
    M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
-    M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \
+    M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
    M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
    M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
    M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
@ -634,8 +634,9 @@ class IColumn;
    M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
    M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
    M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
-    M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \
+    M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
    M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
+    M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
    M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
    M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
    M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
@ -952,7 +953,6 @@ class IColumn;

 #define OBSOLETE_SETTINGS(M, ALIAS) \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
-    MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \
    MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
    MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
    MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
--- a/src/Functions/FunctionsBinaryRepresentation.cpp
+++ b/src/Functions/FunctionsBinaryRepresentation.cpp
@ -3,14 +3,14 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnsNumber.h>
-#include <Common/BitHelpers.h>
-#include <Common/BinStringDecodeHelper.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/castColumn.h>
+#include <Common/BinStringDecodeHelper.h>
+#include <Common/BitHelpers.h>

 namespace DB
 {
@ -218,10 +218,7 @@ struct UnbinImpl
    static constexpr auto name = "unbin";
    static constexpr size_t word_size = 8;

-    static void decode(const char * pos, const char * end, char *& out)
-    {
-        binStringDecode(pos, end, out);
-    }
+    static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); }
 };

 /// Encode number or string to string with binary or hexadecimal representation
@ -651,7 +648,15 @@ public:

            size_t size = in_offsets.size();
            out_offsets.resize(size);
-            out_vec.resize(in_vec.size() / word_size + size);
+
+            size_t max_out_len = 0;
+            for (size_t i = 0; i < in_offsets.size(); ++i)
+            {
+                const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
+                    - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
+                max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1;
+            }
+            out_vec.resize(max_out_len);

            char * begin = reinterpret_cast<char *>(out_vec.data());
            char * pos = begin;
@ -661,6 +666,7 @@ public:
            {
                size_t new_offset = in_offsets[i];

+                /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte
                Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);

                out_offsets[i] = pos - begin;
@ -668,6 +674,9 @@ public:
                prev_offset = new_offset;
            }

+            chassert(
+                static_cast<size_t>(pos - begin) <= out_vec.size(),
+                fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
            out_vec.resize(pos - begin);

            return col_res;
@ -680,11 +689,11 @@ public:
            ColumnString::Offsets & out_offsets = col_res->getOffsets();

            const ColumnString::Chars & in_vec = col_fix_string->getChars();
-            size_t n = col_fix_string->getN();
+            const size_t n = col_fix_string->getN();

            size_t size = col_fix_string->size();
            out_offsets.resize(size);
-            out_vec.resize(in_vec.size() / word_size + size);
+            out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);

            char * begin = reinterpret_cast<char *>(out_vec.data());
            char * pos = begin;
@ -694,6 +703,7 @@ public:
            {
                size_t new_offset = prev_offset + n;

+                /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte
                Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);

                out_offsets[i] = pos - begin;
@ -701,6 +711,9 @@ public:
                prev_offset = new_offset;
            }

+            chassert(
+                static_cast<size_t>(pos - begin) <= out_vec.size(),
+                fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
            out_vec.resize(pos - begin);

            return col_res;
--- a/src/IO/BufferWithOwnMemory.h
+++ b/src/IO/BufferWithOwnMemory.h
@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator
    char * m_data = nullptr;
    size_t alignment = 0;

-    [[maybe_unused]] bool allow_gwp_asan_force_sample;
+    [[maybe_unused]] bool allow_gwp_asan_force_sample{false};

    Memory() = default;

--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
    auto & insert_query = query->as<ASTInsertQuery &>();
    insert_query.async_insert_flush = true;

-    InterpreterInsertQuery interpreter(
-        query,
-        query_context,
-        query_context->getSettingsRef().insert_allow_materialized_columns,
-        /* no_squash */ false,
-        /* no_destination */ false,
-        /* async_insert */ false);
+    InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
    auto table = interpreter.getTable(insert_query);
    auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);

@ -787,12 +781,7 @@ try
    try
    {
        interpreter = std::make_unique<InterpreterInsertQuery>(
-            key.query,
-            insert_context,
-            key.settings.insert_allow_materialized_columns,
-            false,
-            false,
-            true);
+            key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);

        pipeline = interpreter->execute().pipeline;
        chassert(pipeline.pushing());
@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
    }

    Chunk chunk(executor.getResultColumns(), total_rows);
-    chunk.getChunkInfos().add(std::move(chunk_info));
+    chunk.setChunkInfo(std::move(chunk_info));
    return chunk;
 }

@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
    }

    Chunk chunk(std::move(result_columns), total_rows);
-    chunk.getChunkInfos().add(std::move(chunk_info));
+    chunk.setChunkInfo(std::move(chunk_info));
    return chunk;
 }

--- a/src/Interpreters/InterpreterCheckQuery.cpp
+++ b/src/Interpreters/InterpreterCheckQuery.cpp
@ -2,7 +2,6 @@
 #include <Interpreters/InterpreterFactory.h>

 #include <algorithm>
-#include <memory>

 #include <Access/Common/AccessFlags.h>

@ -23,7 +22,6 @@
 #include <Parsers/ASTCheckQuery.h>
 #include <Parsers/ASTSetQuery.h>

-#include <Processors/Chunk.h>
 #include <Processors/IAccumulatingTransform.h>
 #include <Processors/IInflatingTransform.h>
 #include <Processors/ISimpleTransform.h>
@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con
    return Chunk(std::move(columns), 1);
 }

-class TableCheckTask : public ChunkInfoCloneable<TableCheckTask>
+class TableCheckTask : public ChunkInfo
 {
 public:
    TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
@ -112,12 +110,6 @@ public:
        context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
    }

-    TableCheckTask(const TableCheckTask & other)
-        : table(other.table)
-        , check_data_tasks(other.check_data_tasks)
-        , is_finished(other.is_finished.load())
-    {}
-
    std::optional<CheckResult> checkNext() const
    {
        if (isFinished())
@ -129,8 +121,8 @@ public:
            std::this_thread::sleep_for(sleep_time);
        });

-        IStorage::DataValidationTasksPtr tmp = check_data_tasks;
-        auto result = table->checkDataNext(tmp);
+        IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks;
+        auto result = table->checkDataNext(check_data_tasks_);
        is_finished = !result.has_value();
        return result;
    }
@ -188,7 +180,7 @@ protected:
        /// source should return at least one row to start pipeline
        result.addColumn(ColumnUInt8::create(1, 1));
        /// actual data stored in chunk info
-        result.getChunkInfos().add(std::move(current_check_task));
+        result.setChunkInfo(std::move(current_check_task));
        return result;
    }

@ -288,7 +280,7 @@ public:
 protected:
    void transform(Chunk & chunk) override
    {
-        auto table_check_task = chunk.getChunkInfos().get<TableCheckTask>();
+        auto table_check_task = std::dynamic_pointer_cast<const TableCheckTask>(chunk.getChunkInfo());
        auto check_result = table_check_task->checkNext();
        if (!check_result)
        {
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create)
        else
            insert->select = create.select->clone();

-        return InterpreterInsertQuery(
-            insert,
-            getContext(),
-            getContext()->getSettingsRef().insert_allow_materialized_columns,
-            /* no_squash */ false,
-            /* no_destination */ false,
-            /* async_isnert */ false).execute();
+        return InterpreterInsertQuery(insert, getContext(),
+            getContext()->getSettingsRef().insert_allow_materialized_columns).execute();
    }

    return {};
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
            }
            else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
            {
-                InterpreterInsertQuery insert(
-                    ast.getExplainedQuery(),
-                    getContext(),
-                    /* allow_materialized */ false,
-                    /* no_squash */ false,
-                    /* no_destination */ false,
-                    /* async_isnert */ false);
+                InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext());
                auto io = insert.execute();
                printPipeline(io.pipeline.getProcessors(), buf);
            }
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -16,7 +16,6 @@
 #include <Interpreters/getTableExpressions.h>
 #include <Interpreters/processColumnTransformers.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
-#include <Interpreters/Context_fwd.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTSelectQuery.h>
@ -27,7 +26,6 @@
 #include <Processors/Transforms/CountingTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Transforms/MaterializingTransform.h>
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
 #include <Processors/Transforms/SquashingTransform.h>
 #include <Processors/Transforms/PlanSquashingTransform.h>
 #include <Processors/Transforms/getSourceFromASTInsertQuery.h>
@ -40,7 +38,6 @@
 #include <Common/ThreadStatus.h>
 #include <Common/checkStackSize.h>
 #include <Common/ProfileEvents.h>
-#include "base/defines.h"


 namespace ProfileEvents
@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
    return out;
 }

-std::pair<std::vector<Chain>, std::vector<Chain>> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block)
-{
-    chassert(presink_streams > 0);
-    chassert(sink_streams > 0);
-
-    ThreadGroupPtr running_group;
-    if (current_thread)
-        running_group = current_thread->getThreadGroup();
-    if (!running_group)
-        running_group = std::make_shared<ThreadGroup>(getContext());
-
-    std::vector<Chain> sink_chains;
-    std::vector<Chain> presink_chains;
-
-    for (size_t i = 0; i < sink_streams; ++i)
-    {
-        auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
-            running_group, /* elapsed_counter_ms= */ nullptr);
-
-        sink_chains.emplace_back(std::move(out));
-    }
-
-    for (size_t i = 0; i < presink_streams; ++i)
-    {
-        auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
-        presink_chains.emplace_back(std::move(out));
-    }
-
-    return {std::move(presink_chains), std::move(sink_chains)};
-}
-
-
-QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table)
-{
-    const Settings & settings = getContext()->getSettingsRef();
-
-    auto metadata_snapshot = table->getInMemoryMetadataPtr();
-    auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
-
-    bool is_trivial_insert_select = false;
-
-    if (settings.optimize_trivial_insert_select)
-    {
-        const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
-        const auto & selects = select_query.list_of_selects->children;
-        const auto & union_modes = select_query.list_of_modes;
-
-        /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
-        const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
-
-        is_trivial_insert_select =
-            std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
-            && std::all_of(selects.begin(), selects.end(), isTrivialSelect);
-    }
-
-    ContextPtr select_context = getContext();
-
-    if (is_trivial_insert_select)
-    {
-        /** When doing trivial INSERT INTO ... SELECT ... FROM table,
-            * don't need to process SELECT with more than max_insert_threads
-            * and it's reasonable to set block size for SELECT to the desired block size for INSERT
-            * to avoid unnecessary squashing.
-            */
-
-        Settings new_settings = select_context->getSettings();
-
-        new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
-
-        if (table->prefersLargeBlocks())
-        {
-            if (settings.min_insert_block_size_rows)
-                new_settings.max_block_size = settings.min_insert_block_size_rows;
-            if (settings.min_insert_block_size_bytes)
-                new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
-        }
-
-        auto context_for_trivial_select = Context::createCopy(context);
-        context_for_trivial_select->setSettings(new_settings);
-        context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
-
-        select_context = context_for_trivial_select;
-    }
-
-    QueryPipelineBuilder pipeline;
-
-    {
-        auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
-
-        if (settings.allow_experimental_analyzer)
-        {
-            InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options);
-            pipeline = interpreter_select_analyzer.buildQueryPipeline();
-        }
-        else
-        {
-            InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options);
-            pipeline = interpreter_select.buildQueryPipeline();
-        }
-    }
-
-    pipeline.dropTotalsAndExtremes();
-
-    /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
-    if (getContext()->getSettingsRef().insert_null_as_default)
-    {
-        const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
-        const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
-        const auto & output_columns = metadata_snapshot->getColumns();
-
-        if (input_columns.size() == query_columns.size())
-        {
-            for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
-            {
-                /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
-                /// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
-                if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
-                    && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
-                    && !isVariant(query_columns[col_idx].type)
-                    && !isDynamic(query_columns[col_idx].type)
-                    && output_columns.has(query_columns[col_idx].name))
-                {
-                    query_sample_block.setColumn(
-                        col_idx,
-                        ColumnWithTypeAndName(
-                            makeNullableOrLowCardinalityNullable(query_columns[col_idx].column),
-                            makeNullableOrLowCardinalityNullable(query_columns[col_idx].type),
-                            query_columns[col_idx].name));
-                }
-            }
-        }
-    }
-
-    auto actions_dag = ActionsDAG::makeConvertingActions(
-            pipeline.getHeader().getColumnsWithTypeAndName(),
-            query_sample_block.getColumnsWithTypeAndName(),
-            ActionsDAG::MatchColumnsMode::Position);
-    auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
-
-    pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-    {
-        return std::make_shared<ExpressionTransform>(in_header, actions);
-    });
-
-    /// We need to convert Sparse columns to full, because it's destination storage
-    /// may not support it or may have different settings for applying Sparse serialization.
-    pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-    {
-        return std::make_shared<MaterializingTransform>(in_header);
-    });
-
-    pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-    {
-        auto context_ptr = getContext();
-        auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
-        counting->setProcessListElement(context_ptr->getProcessListElement());
-        counting->setProgressCallback(context_ptr->getProgressCallback());
-
-        return counting;
-    });
-
-    size_t num_select_threads = pipeline.getNumThreads();
-
-    pipeline.resize(1);
-
-    if (shouldAddSquashingFroStorage(table))
-    {
-        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-        {
-            return std::make_shared<PlanSquashingTransform>(
-                in_header,
-                table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
-                table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
-        });
-    }
-
-    pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr
-    {
-        return std::make_shared<DeduplicationToken::AddTokenInfoTransform>(in_header);
-    });
-
-    if (!settings.insert_deduplication_token.value.empty())
-    {
-        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-        {
-            return std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, in_header);
-        });
-
-        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-        {
-            return std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(in_header);
-        });
-    }
-
-    /// Number of streams works like this:
-    ///  * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
-    ///    InterpreterSelectQuery ends up with.
-    ///  * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
-    ///    materializing and squashing (too slow to do in one thread). That's `presink_chains`.
-    ///  * If the table supports parallel inserts, use max_insert_threads for writing to IStorage.
-    ///    Otherwise ResizeProcessor them down to 1 stream.
-
-    size_t presink_streams_size = std::max<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
-
-    size_t sink_streams_size = table->supportsParallelInsert() ? std::max<size_t>(1, settings.max_insert_threads) : 1;
-
-    if (!settings.parallel_view_processing)
-    {
-        auto table_id = table->getStorageID();
-        auto views = DatabaseCatalog::instance().getDependentViews(table_id);
-
-        if (table->isView() || !views.empty())
-            sink_streams_size = 1;
-    }
-
-    auto [presink_chains, sink_chains] = buildPreAndSinkChains(
-        presink_streams_size, sink_streams_size,
-        table, metadata_snapshot, query_sample_block);
-
-    pipeline.resize(presink_chains.size());
-
-    if (shouldAddSquashingFroStorage(table))
-    {
-        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
-        {
-            return std::make_shared<ApplySquashingTransform>(
-                in_header,
-                table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
-                table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
-        });
-    }
-
-    for (auto & chain : presink_chains)
-        pipeline.addResources(chain.detachResources());
-    pipeline.addChains(std::move(presink_chains));
-
-    pipeline.resize(sink_streams_size);
-
-    for (auto & chain : sink_chains)
-        pipeline.addResources(chain.detachResources());
-    pipeline.addChains(std::move(sink_chains));
-
-    if (!settings.parallel_view_processing)
-    {
-        /// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
-        if (pipeline.getNumThreads() > num_select_threads)
-            pipeline.setMaxThreads(num_select_threads);
-    }
-    else if (pipeline.getNumThreads() < settings.max_threads)
-    {
-        /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
-        /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
-        ///
-        /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
-        pipeline.setMaxThreads(settings.max_threads);
-    }
-
-    pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
-    {
-        return std::make_shared<EmptySink>(cur_header);
-    });
-
-    return QueryPipelineBuilder::getPipeline(std::move(pipeline));
-}
-
-
-QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table)
-{
-    const Settings & settings = getContext()->getSettingsRef();
-
-    auto metadata_snapshot = table->getInMemoryMetadataPtr();
-    auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
-
-    Chain chain;
-
-    {
-        auto [presink_chains, sink_chains] = buildPreAndSinkChains(
-            /* presink_streams */1, /* sink_streams */1,
-            table, metadata_snapshot, query_sample_block);
-
-        chain = std::move(presink_chains.front());
-        chain.appendChain(std::move(sink_chains.front()));
-    }
-
-    if (!settings.insert_deduplication_token.value.empty())
-    {
-        chain.addSource(std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(chain.getInputHeader()));
-        chain.addSource(std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, chain.getInputHeader()));
-    }
-
-    chain.addSource(std::make_shared<DeduplicationToken::AddTokenInfoTransform>(chain.getInputHeader()));
-
-    if (shouldAddSquashingFroStorage(table))
-    {
-        bool table_prefers_large_blocks = table->prefersLargeBlocks();
-
-        auto squashing = std::make_shared<ApplySquashingTransform>(
-                    chain.getInputHeader(),
-                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
-                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
-
-        chain.addSource(std::move(squashing));
-
-        auto balancing = std::make_shared<PlanSquashingTransform>(
-                    chain.getInputHeader(),
-                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
-                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
-
-        chain.addSource(std::move(balancing));
-    }
-
-    auto context_ptr = getContext();
-    auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
-    counting->setProcessListElement(context_ptr->getProcessListElement());
-    counting->setProgressCallback(context_ptr->getProgressCallback());
-    chain.addSource(std::move(counting));
-
-    QueryPipeline pipeline = QueryPipeline(std::move(chain));
-
-    pipeline.setNumThreads(std::min<size_t>(pipeline.getNumThreads(), settings.max_threads));
-    pipeline.setConcurrencyControl(settings.use_concurrency_control);
-
-    if (query.hasInlinedData() && !async_insert)
-    {
-        /// can execute without additional data
-        auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
-        for (auto && buffer : owned_buffers)
-            format->addBuffer(std::move(buffer));
-
-        auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
-        pipeline.complete(std::move(pipe));
-    }
-
-    return pipeline;
-}
-
-
 BlockIO InterpreterInsertQuery::execute()
 {
    const Settings & settings = getContext()->getSettingsRef();
    auto & query = query_ptr->as<ASTInsertQuery &>();

+    QueryPipelineBuilder pipeline;
+    std::optional<QueryPipeline> distributed_pipeline;
+    QueryPlanResourceHolder resources;

    StoragePtr table = getTable(query);
    checkStorageSupportsTransactionsIfNeeded(table, getContext());

+    StoragePtr inner_table;
+    if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
+        inner_table = mv->getTargetTable();
+
    if (query.partition_by && !table->supportsPartitionBy())
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");

    auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
-
    auto metadata_snapshot = table->getInMemoryMetadataPtr();
+
    auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);

    /// For table functions we check access while executing
@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute()
    if (!query.table_function)
        getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());

-    if (!allow_materialized)
+    if (query.select && settings.parallel_distributed_insert_select)
+        // Distributed INSERT SELECT
+        distributed_pipeline = table->distributedWrite(query, getContext());
+
+    std::vector<Chain> presink_chains;
+    std::vector<Chain> sink_chains;
+    if (!distributed_pipeline)
    {
-        for (const auto & column : metadata_snapshot->getColumns())
-            if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
+        /// Number of streams works like this:
+        ///  * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
+        ///    InterpreterSelectQuery ends up with.
+        ///  * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
+        ///    materializing and squashing (too slow to do in one thread). That's `presink_chains`.
+        ///  * If the table supports parallel inserts, use the same streams for writing to IStorage.
+        ///    Otherwise ResizeProcessor them down to 1 stream.
+        ///  * If it's not an INSERT SELECT, forget all that and use one stream.
+        size_t pre_streams_size = 1;
+        size_t sink_streams_size = 1;
+
+        if (query.select)
+        {
+            bool is_trivial_insert_select = false;
+
+            if (settings.optimize_trivial_insert_select)
+            {
+                const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
+                const auto & selects = select_query.list_of_selects->children;
+                const auto & union_modes = select_query.list_of_modes;
+
+                /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
+                const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
+
+                is_trivial_insert_select =
+                    std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
+                    && std::all_of(selects.begin(), selects.end(), isTrivialSelect);
+            }
+
+            if (is_trivial_insert_select)
+            {
+                /** When doing trivial INSERT INTO ... SELECT ... FROM table,
+                  * don't need to process SELECT with more than max_insert_threads
+                  * and it's reasonable to set block size for SELECT to the desired block size for INSERT
+                  * to avoid unnecessary squashing.
+                  */
+
+                Settings new_settings = getContext()->getSettings();
+
+                new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
+
+                if (table->prefersLargeBlocks())
+                {
+                    if (settings.min_insert_block_size_rows)
+                        new_settings.max_block_size = settings.min_insert_block_size_rows;
+                    if (settings.min_insert_block_size_bytes)
+                        new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
+                }
+
+                auto new_context = Context::createCopy(context);
+                new_context->setSettings(new_settings);
+                new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
+
+                auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
+
+                if (settings.allow_experimental_analyzer)
+                {
+                    InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options);
+                    pipeline = interpreter_select_analyzer.buildQueryPipeline();
+                }
+                else
+                {
+                    InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options);
+                    pipeline = interpreter_select.buildQueryPipeline();
+                }
+            }
+            else
+            {
+                /// Passing 1 as subquery_depth will disable limiting size of intermediate result.
+                auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
+
+                if (settings.allow_experimental_analyzer)
+                {
+                    InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options);
+                    pipeline = interpreter_select_analyzer.buildQueryPipeline();
+                }
+                else
+                {
+                    InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options);
+                    pipeline = interpreter_select.buildQueryPipeline();
+                }
+            }
+
+            pipeline.dropTotalsAndExtremes();
+
+            if (settings.max_insert_threads > 1)
+            {
+                auto table_id = table->getStorageID();
+                auto views = DatabaseCatalog::instance().getDependentViews(table_id);
+
+                /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
+                /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
+                const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
+                pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
+                                                                : std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
+
+                /// Deduplication when passing insert_deduplication_token breaks if using more than one thread
+                if (!settings.insert_deduplication_token.toString().empty())
+                {
+                    LOG_DEBUG(
+                        getLogger("InsertQuery"),
+                        "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues");
+                    pre_streams_size = 1;
+                }
+
+                if (table->supportsParallelInsert())
+                    sink_streams_size = pre_streams_size;
+            }
+
+            pipeline.resize(pre_streams_size);
+
+            /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
+            if (getContext()->getSettingsRef().insert_null_as_default)
+            {
+                const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
+                const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
+                const auto & output_columns = metadata_snapshot->getColumns();
+
+                if (input_columns.size() == query_columns.size())
+                {
+                    for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
+                    {
+                        /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
+                        /// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
+                        if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
+                            && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
+                            && !isVariant(query_columns[col_idx].type)
+                            && !isDynamic(query_columns[col_idx].type)
+                            && output_columns.has(query_columns[col_idx].name))
+                            query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
+                    }
+                }
+            }
+        }
+
+        ThreadGroupPtr running_group;
+        if (current_thread)
+            running_group = current_thread->getThreadGroup();
+        if (!running_group)
+            running_group = std::make_shared<ThreadGroup>(getContext());
+        for (size_t i = 0; i < sink_streams_size; ++i)
+        {
+            auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
+                running_group, /* elapsed_counter_ms= */ nullptr);
+            sink_chains.emplace_back(std::move(out));
+        }
+        for (size_t i = 0; i < pre_streams_size; ++i)
+        {
+            auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
+            presink_chains.emplace_back(std::move(out));
+        }
    }

    BlockIO res;

-    if (query.select)
+    /// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
+    if (distributed_pipeline)
    {
-        if (settings.parallel_distributed_insert_select)
+        res.pipeline = std::move(*distributed_pipeline);
+    }
+    else if (query.select)
+    {
+        const auto & header = presink_chains.at(0).getInputHeader();
+        auto actions_dag = ActionsDAG::makeConvertingActions(
+                pipeline.getHeader().getColumnsWithTypeAndName(),
+                header.getColumnsWithTypeAndName(),
+                ActionsDAG::MatchColumnsMode::Position);
+        auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
+
+        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
        {
-            auto distributed = table->distributedWrite(query, getContext());
-            if (distributed)
-            {
-                res.pipeline = std::move(*distributed);
-            }
-            else
-            {
-                res.pipeline = buildInsertSelectPipeline(query, table);
-            }
-        }
-        else
+            return std::make_shared<ExpressionTransform>(in_header, actions);
+        });
+
+        /// We need to convert Sparse columns to full, because it's destination storage
+        /// may not support it or may have different settings for applying Sparse serialization.
+        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
        {
-            res.pipeline = buildInsertSelectPipeline(query, table);
+            return std::make_shared<MaterializingTransform>(in_header);
+        });
+
+        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+        {
+            auto context_ptr = getContext();
+            auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
+            counting->setProcessListElement(context_ptr->getProcessListElement());
+            counting->setProgressCallback(context_ptr->getProgressCallback());
+
+            return counting;
+        });
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            size_t threads = presink_chains.size();
+
+            pipeline.resize(1);
+
+            pipeline.addTransform(std::make_shared<PlanSquashingTransform>(
+                    header,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
+
+            pipeline.resize(threads);
+
+            pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+            {
+                return std::make_shared<ApplySquashingTransform>(
+                    in_header,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+            });
        }
+
+        size_t num_select_threads = pipeline.getNumThreads();
+
+        for (auto & chain : presink_chains)
+            resources = chain.detachResources();
+        for (auto & chain : sink_chains)
+            resources = chain.detachResources();
+
+        pipeline.addChains(std::move(presink_chains));
+        pipeline.resize(sink_chains.size());
+        pipeline.addChains(std::move(sink_chains));
+
+        if (!settings.parallel_view_processing)
+        {
+            /// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
+            if (pipeline.getNumThreads() > num_select_threads)
+                pipeline.setMaxThreads(num_select_threads);
+        }
+        else if (pipeline.getNumThreads() < settings.max_threads)
+        {
+            /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
+            /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
+            ///
+            /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
+            pipeline.setMaxThreads(settings.max_threads);
+        }
+
+        pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
+        {
+            return std::make_shared<EmptySink>(cur_header);
+        });
+
+        if (!allow_materialized)
+        {
+            for (const auto & column : metadata_snapshot->getColumns())
+                if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
+                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
+        }
+
+        res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
    }
    else
    {
-        res.pipeline = buildInsertPipeline(query, table);
+        auto & chain = presink_chains.at(0);
+        chain.appendChain(std::move(sink_chains.at(0)));
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            auto squashing = std::make_shared<ApplySquashingTransform>(
+                    chain.getInputHeader(),
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+
+            chain.addSource(std::move(squashing));
+
+            auto balancing = std::make_shared<PlanSquashingTransform>(
+                    chain.getInputHeader(),
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+
+            chain.addSource(std::move(balancing));
+        }
+
+        auto context_ptr = getContext();
+        auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
+        counting->setProcessListElement(context_ptr->getProcessListElement());
+        counting->setProgressCallback(context_ptr->getProgressCallback());
+        chain.addSource(std::move(counting));
+
+        res.pipeline = QueryPipeline(std::move(presink_chains[0]));
+        res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
+        res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
+
+        if (query.hasInlinedData() && !async_insert)
+        {
+            /// can execute without additional data
+            auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
+            for (auto && buffer : owned_buffers)
+                format->addBuffer(std::move(buffer));
+
+            auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
+            res.pipeline.complete(std::move(pipe));
+        }
    }

-    res.pipeline.addStorageHolder(table);
+    res.pipeline.addResources(std::move(resources));

-    if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
-        res.pipeline.addStorageHolder(mv->getTargetTable());
+    res.pipeline.addStorageHolder(table);
+    if (inner_table)
+        res.pipeline.addStorageHolder(inner_table);

    return res;
 }
@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont
    }
 }

-
 void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
 {
    extendQueryLogElemImpl(elem, context_);
 }

-
 void registerInterpreterInsertQuery(InterpreterFactory & factory)
 {
    auto create_fn = [] (const InterpreterFactory::Arguments & args)
    {
-        return std::make_unique<InterpreterInsertQuery>(
-            args.query,
-            args.context,
-            args.allow_materialized,
-            /* no_squash */false,
-            /* no_destination */false,
-            /* async_insert */false);
+        return std::make_unique<InterpreterInsertQuery>(args.query, args.context, args.allow_materialized);
    };
    factory.registerInterpreter("InterpreterInsertQuery", create_fn);
 }
-
-
 }
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@ -23,10 +23,10 @@ public:
    InterpreterInsertQuery(
        const ASTPtr & query_ptr_,
        ContextPtr context_,
-        bool allow_materialized_,
-        bool no_squash_,
-        bool no_destination,
-        bool async_insert_);
+        bool allow_materialized_ = false,
+        bool no_squash_ = false,
+        bool no_destination_ = false,
+        bool async_insert_ = false);

    /** Prepare a request for execution. Return block streams
      * - the stream into which you can write data to execute the query, if INSERT;
@ -73,17 +73,12 @@ private:

    ASTPtr query_ptr;
    const bool allow_materialized;
-    bool no_squash = false;
-    bool no_destination = false;
+    const bool no_squash;
+    const bool no_destination;
    const bool async_insert;

    std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;

-    std::pair<std::vector<Chain>, std::vector<Chain>> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block);
-
-    QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table);
-    QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table);
-
    Chain buildSink(
        const StoragePtr & table,
        const StorageMetadataPtr & metadata_snapshot,
--- a/src/Interpreters/Squashing.cpp
+++ b/src/Interpreters/Squashing.cpp
@ -1,7 +1,6 @@
 #include <vector>
 #include <Interpreters/Squashing.h>
 #include <Common/CurrentThread.h>
-#include <base/defines.h>


 namespace DB
@ -12,33 +11,24 @@ namespace ErrorCodes
 }

 Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
-    : min_block_size_rows(min_block_size_rows_)
+    : header(header_)
+    , min_block_size_rows(min_block_size_rows_)
    , min_block_size_bytes(min_block_size_bytes_)
-    , header(header_)
 {
 }

 Chunk Squashing::flush()
 {
-    if (!accumulated)
-        return {};
-
-    auto result = convertToChunk(accumulated.extract());
-    chassert(result);
-    return result;
+    return convertToChunk(std::move(chunks_to_merge_vec));
 }

 Chunk Squashing::squash(Chunk && input_chunk)
 {
-    if (!input_chunk)
+    if (!input_chunk.hasChunkInfo())
        return Chunk();

-    auto squash_info = input_chunk.getChunkInfos().extract<ChunksToSquash>();
-
-    if (!squash_info)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
-
-    return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos()));
+    const auto *info = getInfoFromChunk(input_chunk);
+    return squash(info->chunks);
 }

 Chunk Squashing::add(Chunk && input_chunk)
@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk)
        return {};

    /// Just read block is already enough.
-    if (isEnoughSize(input_chunk))
+    if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes()))
    {
        /// If no accumulated data, return just read block.
-        if (!accumulated)
+        if (chunks_to_merge_vec.empty())
        {
-            accumulated.add(std::move(input_chunk));
-            return convertToChunk(accumulated.extract());
+            chunks_to_merge_vec.push_back(std::move(input_chunk));
+            Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
+            chunks_to_merge_vec.clear();
+            return res_chunk;
        }

        /// Return accumulated data (maybe it has small size) and place new block to accumulated data.
-        Chunk res_chunk = convertToChunk(accumulated.extract());
-        accumulated.add(std::move(input_chunk));
+        Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
+        chunks_to_merge_vec.clear();
+        changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
+        chunks_to_merge_vec.push_back(std::move(input_chunk));
        return res_chunk;
    }

    /// Accumulated block is already enough.
-    if (isEnoughSize())
+    if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
    {
        /// Return accumulated data and place new block to accumulated data.
-        Chunk res_chunk = convertToChunk(accumulated.extract());
-        accumulated.add(std::move(input_chunk));
+        Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
+        chunks_to_merge_vec.clear();
+        changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
+        chunks_to_merge_vec.push_back(std::move(input_chunk));
        return res_chunk;
    }

    /// Pushing data into accumulating vector
-    accumulated.add(std::move(input_chunk));
+    expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
+    chunks_to_merge_vec.push_back(std::move(input_chunk));

    /// If accumulated data is big enough, we send it
-    if (isEnoughSize())
-        return convertToChunk(accumulated.extract());
-
+    if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
+    {
+        Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
+        changeCurrentSize(0, 0);
+        chunks_to_merge_vec.clear();
+        return res_chunk;
+    }
    return {};
 }

@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector<Chunk> && chunks) const
    auto info = std::make_shared<ChunksToSquash>();
    info->chunks = std::move(chunks);

-    // It is imortant that chunk is not empty, it has to have columns even if they are empty
-    auto aggr_chunk = Chunk(header.getColumns(), 0);
-    aggr_chunk.getChunkInfos().add(std::move(info));
-    chassert(aggr_chunk);
-    return aggr_chunk;
+    chunks.clear();
+
+    return Chunk(header.cloneEmptyColumns(), 0, info);
 }

-Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos)
+Chunk Squashing::squash(std::vector<Chunk> & input_chunks)
 {
+    Chunk accumulated_chunk;
    std::vector<IColumn::MutablePtr> mutable_columns = {};
    size_t rows = 0;
    for (const Chunk & chunk : input_chunks)
@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
        for (size_t j = 0, size = mutable_columns.size(); j < size; ++j)
        {
            const auto source_column = columns[j];
+
            mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size());
        }
    }
+    accumulated_chunk.setColumns(std::move(mutable_columns), rows);
+    return accumulated_chunk;
+}

-    Chunk result;
-    result.setColumns(std::move(mutable_columns), rows);
-    result.setChunkInfos(infos);
-    result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos()));
+const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk)
+{
+    const auto& info = chunk.getChunkInfo();
+    const auto * agg_info = typeid_cast<const ChunksToSquash *>(info.get());

-    chassert(result);
-    return result;
+    if (!agg_info)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
+
+    return agg_info;
+}
+
+void Squashing::expandCurrentSize(size_t rows, size_t bytes)
+{
+    accumulated_size.rows += rows;
+    accumulated_size.bytes += bytes;
+}
+
+void Squashing::changeCurrentSize(size_t rows, size_t bytes)
+{
+    accumulated_size.rows = rows;
+    accumulated_size.bytes = bytes;
 }

 bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
        || (min_block_size_rows && rows >= min_block_size_rows)
        || (min_block_size_bytes && bytes >= min_block_size_bytes);
 }
-
-bool Squashing::isEnoughSize() const
-{
-    return isEnoughSize(accumulated.getRows(), accumulated.getBytes());
-};
-
-bool Squashing::isEnoughSize(const Chunk & chunk) const
-{
-    return isEnoughSize(chunk.getNumRows(), chunk.bytes());
-}
-
-void Squashing::CurrentSize::add(Chunk && chunk)
-{
-    rows += chunk.getNumRows();
-    bytes += chunk.bytes();
-    chunks.push_back(std::move(chunk));
-}
-
-std::vector<Chunk> Squashing::CurrentSize::extract()
-{
-    auto result = std::move(chunks);
-    *this = {};
-    return result;
-}
 }
--- a/src/Interpreters/Squashing.h
+++ b/src/Interpreters/Squashing.h
@ -8,18 +8,9 @@
 namespace DB
 {

-class ChunksToSquash : public ChunkInfoCloneable<ChunksToSquash>
+struct ChunksToSquash : public ChunkInfo
 {
-public:
-    ChunksToSquash() = default;
-    ChunksToSquash(const ChunksToSquash & other)
-    {
-        chunks.reserve(other.chunks.size());
-        for (const auto & chunk: other.chunks)
-           chunks.push_back(chunk.clone());
-    }
-
-    std::vector<Chunk> chunks = {};
+    mutable std::vector<Chunk> chunks = {};
 };

 /** Merging consecutive passed blocks to specified minimum size.
@ -45,35 +36,32 @@ public:
    static Chunk squash(Chunk && input_chunk);
    Chunk flush();

-    void setHeader(Block header_) { header = std::move(header_); }
-    const Block & getHeader() const { return header; }
-
-private:
-    class CurrentSize
+    bool isDataLeft()
+    {
+        return !chunks_to_merge_vec.empty();
+    }
+
+    Block header;
+private:
+    struct CurrentSize
    {
-        std::vector<Chunk> chunks = {};
        size_t rows = 0;
        size_t bytes = 0;
-
-    public:
-        explicit operator bool () const { return !chunks.empty(); }
-        size_t getRows() const { return rows; }
-        size_t getBytes() const { return bytes; }
-        void add(Chunk && chunk);
-        std::vector<Chunk> extract();
    };

-    const size_t min_block_size_rows;
-    const size_t min_block_size_bytes;
-    Block header;
+    std::vector<Chunk> chunks_to_merge_vec = {};
+    size_t min_block_size_rows;
+    size_t min_block_size_bytes;

-    CurrentSize accumulated;
+    CurrentSize accumulated_size;

-    static Chunk squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos);
+    static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk);

-    bool isEnoughSize() const;
+    static Chunk squash(std::vector<Chunk> & input_chunks);
+
+    void expandCurrentSize(size_t rows, size_t bytes);
+    void changeCurrentSize(size_t rows, size_t bytes);
    bool isEnoughSize(size_t rows, size_t bytes) const;
-    bool isEnoughSize(const Chunk & chunk) const;

    Chunk convertToChunk(std::vector<Chunk> && chunks) const;
 };
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@ -538,13 +538,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
        insert_context->makeQueryContext();
        addSettingsForQuery(insert_context, IAST::QueryKind::Insert);

-        InterpreterInsertQuery interpreter(
-            query_ptr,
-            insert_context,
-            /* allow_materialized */ false,
-            /* no_squash */ false,
-            /* no_destination */ false,
-            /* async_isnert */ false);
+        InterpreterInsertQuery interpreter(query_ptr, insert_context);
        BlockIO io = interpreter.execute();

        PushingPipelineExecutor executor(io.pipeline);
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
        }
    }

-    /// Check for dynamic subcolumns in unknown required columns.
+    /// Check for dynamic subcolums in unknown required columns.
    if (!unknown_required_source_columns.empty())
    {
        for (const NameAndTypePair & pair : source_columns_ordinary)
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node,

    if (hex)
    {
-        hexStringDecode(str_begin, str_end, res_pos);
+        hexStringDecode(str_begin, str_end, res_pos, word_size);
    }
    else
    {
-        binStringDecode(str_begin, str_end, res_pos);
+        binStringDecode(str_begin, str_end, res_pos, word_size);
    }

    return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
--- a/src/Processors/Chunk.cpp
+++ b/src/Processors/Chunk.cpp
@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns
    checkNumRowsIsConsistent();
 }

+Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+    : columns(std::move(columns_))
+    , num_rows(num_rows_)
+    , chunk_info(std::move(chunk_info_))
+{
+    checkNumRowsIsConsistent();
+}
+
 static Columns unmuteColumns(MutableColumns && mutable_columns)
 {
    Columns columns;
@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
    checkNumRowsIsConsistent();
 }

+Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+    : columns(unmuteColumns(std::move(columns_)))
+    , num_rows(num_rows_)
+    , chunk_info(std::move(chunk_info_))
+{
+    checkNumRowsIsConsistent();
+}
+
 Chunk Chunk::clone() const
 {
-    auto tmp = Chunk(getColumns(), getNumRows());
-    tmp.setChunkInfos(chunk_infos.clone());
-    return tmp;
+    return Chunk(getColumns(), getNumRows(), chunk_info);
 }

 void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
--- a/src/Processors/Chunk.h
+++ b/src/Processors/Chunk.h
@ -1,9 +1,7 @@
 #pragma once

-#include <Common/CollectionOfDerived.h>
 #include <Columns/IColumn.h>
-
-#include <memory>
+#include <unordered_map>

 namespace DB
 {
@ -11,29 +9,11 @@ namespace DB
 class ChunkInfo
 {
 public:
-    using Ptr = std::shared_ptr<ChunkInfo>;
-
-    ChunkInfo() = default;
-    ChunkInfo(const ChunkInfo&) = default;
-    ChunkInfo(ChunkInfo&&) = default;
-
-    virtual Ptr clone() const = 0;
    virtual ~ChunkInfo() = default;
+    ChunkInfo() = default;
 };

-
-template<class Derived>
-class ChunkInfoCloneable : public ChunkInfo
-{
-public:
-    ChunkInfoCloneable() = default;
-    ChunkInfoCloneable(const ChunkInfoCloneable & other) = default;
-
-    Ptr clone() const override
-    {
-        return std::static_pointer_cast<ChunkInfo>(std::make_shared<Derived>(*static_cast<const Derived*>(this)));
-    }
-};
+using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;

 /**
 * Chunk is a list of columns with the same length.
@ -52,26 +32,26 @@ public:
 class Chunk
 {
 public:
-    using ChunkInfoCollection = CollectionOfDerivedItems<ChunkInfo>;
-
    Chunk() = default;
    Chunk(const Chunk & other) = delete;
    Chunk(Chunk && other) noexcept
        : columns(std::move(other.columns))
        , num_rows(other.num_rows)
-        , chunk_infos(std::move(other.chunk_infos))
+        , chunk_info(std::move(other.chunk_info))
    {
        other.num_rows = 0;
    }

    Chunk(Columns columns_, UInt64 num_rows_);
+    Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
    Chunk(MutableColumns columns_, UInt64 num_rows_);
+    Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);

    Chunk & operator=(const Chunk & other) = delete;
    Chunk & operator=(Chunk && other) noexcept
    {
        columns = std::move(other.columns);
-        chunk_infos = std::move(other.chunk_infos);
+        chunk_info = std::move(other.chunk_info);
        num_rows = other.num_rows;
        other.num_rows = 0;
        return *this;
@ -82,15 +62,15 @@ public:
    void swap(Chunk & other) noexcept
    {
        columns.swap(other.columns);
+        chunk_info.swap(other.chunk_info);
        std::swap(num_rows, other.num_rows);
-        chunk_infos.swap(other.chunk_infos);
    }

    void clear()
    {
        num_rows = 0;
        columns.clear();
-        chunk_infos.clear();
+        chunk_info.reset();
    }

    const Columns & getColumns() const { return columns; }
@ -101,9 +81,9 @@ public:
    /** Get empty columns with the same types as in block. */
    MutableColumns cloneEmptyColumns() const;

-    ChunkInfoCollection & getChunkInfos() { return chunk_infos; }
-    const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; }
-    void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); }
+    const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
+    bool hasChunkInfo() const { return chunk_info != nullptr; }
+    void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }

    UInt64 getNumRows() const { return num_rows; }
    UInt64 getNumColumns() const { return columns.size(); }
@ -127,7 +107,7 @@ public:
 private:
    Columns columns;
    UInt64 num_rows = 0;
-    ChunkInfoCollection chunk_infos;
+    ChunkInfoPtr chunk_info;

    void checkNumRowsIsConsistent();
 };
@ -137,15 +117,11 @@ using Chunks = std::vector<Chunk>;
 /// AsyncInsert needs two kinds of information:
 /// - offsets of different sub-chunks
 /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
-class AsyncInsertInfo : public ChunkInfoCloneable<AsyncInsertInfo>
+class AsyncInsertInfo : public ChunkInfo
 {
 public:
    AsyncInsertInfo() = default;
-    AsyncInsertInfo(const AsyncInsertInfo & other) = default;
-    AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_)
-        : offsets(offsets_)
-        , tokens(tokens_)
-    {}
+    explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}

    std::vector<size_t> offsets;
    std::vector<String> tokens;
@ -154,11 +130,9 @@ public:
 using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;

 /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
-class ChunkMissingValues : public ChunkInfoCloneable<ChunkMissingValues>
+class ChunkMissingValues : public ChunkInfo
 {
 public:
-    ChunkMissingValues(const ChunkMissingValues & other) = default;
-
    using RowsBitMask = std::vector<bool>; /// a bit per row for a column

    const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
--- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds)

    block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());

-    if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
+    if (auto chunk_info = chunk.getChunkInfo())
    {
-         block.info.bucket_num = agg_info->bucket_num;
-         block.info.is_overflows = agg_info->is_overflows;
+        if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
+        {
+            block.info.bucket_num = agg_info->bucket_num;
+            block.info.is_overflows = agg_info->is_overflows;
+        }
    }

    return true;
--- a/src/Processors/Executors/PullingPipelineExecutor.cpp
+++ b/src/Processors/Executors/PullingPipelineExecutor.cpp
@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block)
    }

    block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
-    if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
+    if (auto chunk_info = chunk.getChunkInfo())
    {
-        block.info.bucket_num = agg_info->bucket_num;
-        block.info.is_overflows = agg_info->is_overflows;
+        if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
+        {
+            block.info.bucket_num = agg_info->bucket_num;
+            block.info.is_overflows = agg_info->is_overflows;
+        }
    }

    return true;
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
                columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);

            Chunks piece;
-            piece.emplace_back(std::move(columns), count);
-            piece.back().setChunkInfos(concatenated.getChunkInfos());
-
+            piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo());
            writeRowGroup(std::move(piece));
        }
    }
--- a/src/Processors/IAccumulatingTransform.cpp
+++ b/src/Processors/IAccumulatingTransform.cpp
@ -8,9 +8,8 @@ namespace ErrorCodes
 }

 IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
-    : IProcessor({std::move(input_header)}, {std::move(output_header)})
-    , input(inputs.front())
-    , output(outputs.front())
+    : IProcessor({std::move(input_header)}, {std::move(output_header)}),
+    input(inputs.front()), output(outputs.front())
 {
 }

--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num
    if (!input.chunk.hasRows())
        return;

-    if (input.chunk.getChunkInfos().empty())
+    const auto & info = input.chunk.getChunkInfo();
+    if (!info)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");

    Int64 allocated_bytes = 0;
-    if (auto arenas_info = input.chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
+    /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator.
+    if (const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
        allocated_bytes = arenas_info->allocated_bytes;

    states[source_num] = State{input.chunk, description, allocated_bytes};
@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
    info->chunk_num = chunk_num++;

    Chunk chunk;
-    chunk.getChunkInfos().add(std::move(info));
+    chunk.setChunkInfo(std::move(info));
    return chunk;
 }

@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation()
            chunks.emplace_back(std::move(new_columns), current_rows);
        }

-        chunks.back().getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
+        chunks.back().setChunkInfo(std::make_shared<AggregatedChunkInfo>());
        states[i].current_row = states[i].to_row;

        /// We assume that sizes in bytes of rows are almost the same.
--- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h
+++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h
@ -6,22 +6,18 @@ namespace DB
 {

 /// To carry part level if chunk is produced by a merge tree source
-class MergeTreePartLevelInfo : public ChunkInfoCloneable<MergeTreePartLevelInfo>
+class MergeTreePartLevelInfo : public ChunkInfo
 {
 public:
    MergeTreePartLevelInfo() = delete;
-    explicit MergeTreePartLevelInfo(ssize_t part_level)
-        : origin_merge_tree_part_level(part_level)
-    { }
-    MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default;
-
+    explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { }
    size_t origin_merge_tree_part_level = 0;
 };

 inline size_t getPartLevelFromChunk(const Chunk & chunk)
 {
-    const auto part_level_info = chunk.getChunkInfos().get<MergeTreePartLevelInfo>();
-    if (part_level_info)
+    const auto & info = chunk.getChunkInfo();
+    if (const auto * part_level_info = typeid_cast<const MergeTreePartLevelInfo *>(info.get()))
        return part_level_info->origin_merge_tree_part_level;
    return 0;
 }
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@ -17,7 +17,7 @@ namespace ErrorCodes

 static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
 {
-    chunk->getChunkInfos().add(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
+    chunk->setChunkInfo(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
    return IMergingAlgorithm::Status(std::move(*chunk), finished);
 }

--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
@ -3,7 +3,6 @@
 #include <Processors/Merges/Algorithms/MergedData.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <Processors/Merges/Algorithms/RowRef.h>
-#include <Processors/Chunk.h>

 namespace Poco
 {
@ -15,13 +14,11 @@ namespace DB

 /** Use in skipping final to keep list of indices of selected row after merging final
  */
-struct ChunkSelectFinalIndices : public ChunkInfoCloneable<ChunkSelectFinalIndices>
+struct ChunkSelectFinalIndices : public ChunkInfo
 {
-    explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
-    ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default;
-
    const ColumnPtr column_holder;
    const ColumnUInt64 * select_final_indices = nullptr;
+    explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
 };

 /** Merges several sorted inputs into one.
--- a/src/Processors/Merges/IMergingTransform.cpp
+++ b/src/Processors/Merges/IMergingTransform.cpp
@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare()
    bool is_port_full = !output.canPush();

    /// Push if has data.
-    if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full)
+    if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full)
        output.push(std::move(state.output_chunk));

    if (!is_initialized)
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@ -129,7 +129,7 @@ public:

        IMergingAlgorithm::Status status = algorithm.merge();

-        if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty())
+        if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo())
        {
            // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
            state.output_chunk = std::move(status.chunk);
--- a/src/Processors/Sinks/RemoteSink.h
+++ b/src/Processors/Sinks/RemoteSink.h
@ -20,7 +20,7 @@ public:
    }

    String getName() const override { return "RemoteSink"; }
-    void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); }
+    void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); }
    void onFinish() override { RemoteInserter::onFinish(); }
 };

--- a/src/Processors/Sinks/SinkToStorage.cpp
+++ b/src/Processors/Sinks/SinkToStorage.cpp
@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk)
      */
    Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));

-    consume(chunk);
-    cur_chunk = std::move(chunk);
+    consume(chunk.clone());
+    if (!lastBlockIsDuplicate())
+        cur_chunk = std::move(chunk);
 }

 SinkToStorage::GenerateResult SinkToStorage::onGenerate()
--- a/src/Processors/Sinks/SinkToStorage.h
+++ b/src/Processors/Sinks/SinkToStorage.h
@ -18,7 +18,8 @@ public:
    void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }

 protected:
-    virtual void consume(Chunk & chunk) = 0;
+    virtual void consume(Chunk chunk) = 0;
+    virtual bool lastBlockIsDuplicate() const { return false; }

 private:
    std::vector<TableLockHolder> table_locks;
@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage
 public:
    using SinkToStorage::SinkToStorage;
    std::string getName() const override { return "NullSinkToStorage"; }
-    void consume(Chunk &) override {}
+    void consume(Chunk) override {}
 };

 using SinkPtr = std::shared_ptr<SinkToStorage>;
--- a/src/Processors/Sources/BlocksSource.h
+++ b/src/Processors/Sources/BlocksSource.h
@ -43,10 +43,7 @@ protected:
        info->bucket_num = res.info.bucket_num;
        info->is_overflows = res.info.is_overflows;

-        auto chunk = Chunk(res.getColumns(), res.rows());
-        chunk.getChunkInfos().add(std::move(info));
-
-        return chunk;
+        return Chunk(res.getColumns(), res.rows(), std::move(info));
    }

 private:
--- a/src/Processors/Sources/RemoteSource.cpp
+++ b/src/Processors/Sources/RemoteSource.cpp
@ -176,7 +176,7 @@ std::optional<Chunk> RemoteSource::tryGenerate()
        auto info = std::make_shared<AggregatedChunkInfo>();
        info->bucket_num = block.info.bucket_num;
        info->is_overflows = block.info.is_overflows;
-        chunk.getChunkInfos().add(std::move(info));
+        chunk.setChunkInfo(std::move(info));
    }

    return chunk;
--- a/src/Processors/Sources/SourceFromSingleChunk.cpp
+++ b/src/Processors/Sources/SourceFromSingleChunk.cpp
@ -5,9 +5,7 @@
 namespace DB
 {

-SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_))
-{
-}
+SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {}

 SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
 {
@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp
        auto info = std::make_shared<AggregatedChunkInfo>();
        info->bucket_num = data.info.bucket_num;
        info->is_overflows = data.info.is_overflows;
-        chunk.getChunkInfos().add(std::move(info));
+        chunk.setChunkInfo(std::move(info));
    }
 }

--- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate()
    variants.aggregates_pool = variants.aggregates_pools.at(0).get();

    /// Pass info about used memory by aggregate functions further.
-    to_push_chunk.getChunkInfos().add(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
+    to_push_chunk.setChunkInfo(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));

    cur_block_bytes = 0;
    cur_block_size = 0;
@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati
 void FinalizeAggregatedTransform::transform(Chunk & chunk)
 {
    if (params->final)
-    {
        finalizeChunk(chunk, aggregates_mask);
-    }
-    else if (!chunk.getChunkInfos().get<AggregatedChunkInfo>())
+    else if (!chunk.getChunkInfo())
    {
-        chunk.getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
+        auto info = std::make_shared<AggregatedChunkInfo>();
+        chunk.setChunkInfo(std::move(info));
    }
 }

--- a/src/Processors/Transforms/AggregatingInOrderTransform.h
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.h
@ -5,7 +5,6 @@
 #include <Processors/ISimpleTransform.h>
 #include <Processors/Transforms/AggregatingTransform.h>
 #include <Processors/Transforms/finalizeChunk.h>
-#include <Processors/Chunk.h>

 namespace DB
 {
@ -13,12 +12,10 @@ namespace DB
 struct InputOrderInfo;
 using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;

-struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable<ChunkInfoWithAllocatedBytes>
+struct ChunkInfoWithAllocatedBytes : public ChunkInfo
 {
-    ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default;
    explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
        : allocated_bytes(allocated_bytes_) {}
-
    Int64 allocated_bytes;
 };

--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block)

    UInt64 num_rows = block.rows();
    Chunk chunk(block.getColumns(), num_rows);
-    chunk.getChunkInfos().add(std::move(info));
+    chunk.setChunkInfo(std::move(info));

    return chunk;
 }
@ -44,11 +44,15 @@ namespace
 {
    const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
    {
-        auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+        const auto & info = chunk.getChunkInfo();
+        if (!info)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk.");
+
+        const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
        if (!agg_info)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");

-        return agg_info.get();
+        return agg_info;
    }

    /// Reads chunks from file in native format. Provide chunks with aggregation info.
@ -206,7 +210,11 @@ private:

    void process(Chunk && chunk)
    {
-        auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
+        if (!chunk.hasChunkInfo())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName());
+
+        const auto & info = chunk.getChunkInfo();
+        const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
        if (!chunks_to_merge)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());

--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@ -2,7 +2,6 @@
 #include <Compression/CompressedReadBuffer.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Interpreters/Aggregator.h>
-#include <Processors/Chunk.h>
 #include <Processors/IAccumulatingTransform.h>
 #include <Common/Stopwatch.h>
 #include <Common/setThreadName.h>
@ -20,7 +19,7 @@ namespace CurrentMetrics
 namespace DB
 {

-class AggregatedChunkInfo : public ChunkInfoCloneable<AggregatedChunkInfo>
+class AggregatedChunkInfo : public ChunkInfo
 {
 public:
    bool is_overflows = false;
--- a/src/Processors/Transforms/ApplySquashingTransform.h
+++ b/src/Processors/Transforms/ApplySquashingTransform.h
@ -27,12 +27,18 @@ public:
        }

        ExceptionKeepingTransform::work();
+        if (finish_chunk)
+        {
+            data.chunk = std::move(finish_chunk);
+            ready_output = true;
+        }
    }

 protected:
    void onConsume(Chunk chunk) override
    {
-        cur_chunk = Squashing::squash(std::move(chunk));
+        if (auto res_chunk = DB::Squashing::squash(std::move(chunk)))
+            cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows());
    }

    GenerateResult onGenerate() override
@ -42,10 +48,16 @@ protected:
        res.is_done = true;
        return res;
    }
+    void onFinish() override
+    {
+        auto chunk = DB::Squashing::squash({});
+        finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
+    }

 private:
    Squashing squashing;
    Chunk cur_chunk;
+    Chunk finish_chunk;
 };

 }
--- a/src/Processors/Transforms/CountingTransform.cpp
+++ b/src/Processors/Transforms/CountingTransform.cpp
@ -1,7 +1,6 @@
-#include <Processors/Transforms/CountingTransform.h>

-#include <IO/Progress.h>
 #include <Interpreters/ProcessList.h>
+#include <Processors/Transforms/CountingTransform.h>
 #include <Common/ProfileEvents.h>
 #include <Common/ThreadStatus.h>

--- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp
+++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp
@ -1,236 +0,0 @@
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
-
-#include <IO/WriteHelpers.h>
-
-#include <Common/logger_useful.h>
-#include <Common/Exception.h>
-#include <Common/SipHash.h>
-
-
-#include <fmt/core.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-void RestoreChunkInfosTransform::transform(Chunk & chunk)
-{
-    chunk.getChunkInfos().append(chunk_infos.clone());
-}
-
-namespace DeduplicationToken
-{
-
-String TokenInfo::getToken() const
-{
-    if (!isDefined())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken());
-
-    return getTokenImpl();
-}
-
-String TokenInfo::getTokenImpl() const
-{
-    String result;
-    result.reserve(getTotalSize());
-
-    for (const auto & part : parts)
-    {
-        if (!result.empty())
-            result.append(":");
-        result.append(part);
-    }
-
-    return result;
-}
-
-String TokenInfo::debugToken() const
-{
-    return getTokenImpl();
-}
-
-void TokenInfo::addChunkHash(String part)
-{
-    if (stage == UNDEFINED && empty())
-        stage = DEFINE_SOURCE_WITH_HASHES;
-
-    if (stage != DEFINE_SOURCE_WITH_HASHES)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    addTokenPart(std::move(part));
-}
-
-void TokenInfo::finishChunkHashes()
-{
-    if (stage == UNDEFINED && empty())
-        stage = DEFINE_SOURCE_WITH_HASHES;
-
-    if (stage != DEFINE_SOURCE_WITH_HASHES)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    stage = DEFINED;
-}
-
-void TokenInfo::setUserToken(const String & token)
-{
-    if (stage == UNDEFINED && empty())
-        stage = DEFINE_SOURCE_USER_TOKEN;
-
-    if (stage != DEFINE_SOURCE_USER_TOKEN)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    addTokenPart(fmt::format("user-token-{}", token));
-}
-
-void TokenInfo::setSourceWithUserToken(size_t block_number)
-{
-    if (stage != DEFINE_SOURCE_USER_TOKEN)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    addTokenPart(fmt::format("source-number-{}", block_number));
-
-    stage = DEFINED;
-}
-
-void TokenInfo::setViewID(const String & id)
-{
-    if (stage == DEFINED)
-        stage = DEFINE_VIEW;
-
-    if (stage != DEFINE_VIEW)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    addTokenPart(fmt::format("view-id-{}", id));
-}
-
-void TokenInfo::setViewBlockNumber(size_t block_number)
-{
-    if (stage != DEFINE_VIEW)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
-
-    addTokenPart(fmt::format("view-block-{}", block_number));
-
-    stage = DEFINED;
-}
-
-void TokenInfo::reset()
-{
-    stage = UNDEFINED;
-    parts.clear();
-}
-
-void TokenInfo::addTokenPart(String part)
-{
-    parts.push_back(std::move(part));
-}
-
-size_t TokenInfo::getTotalSize() const
-{
-    if (parts.empty())
-        return 0;
-
-    size_t size = 0;
-    for (const auto & part : parts)
-        size += part.size();
-
-    // we reserve more size here to be able to add delimenter between parts.
-    return size + parts.size() - 1;
-}
-
-#ifdef ABORT_ON_LOGICAL_ERROR
-void CheckTokenTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-
-    if (!token_info)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug);
-
-    LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken());
-}
-#endif
-
-String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk)
-{
-    SipHash hash;
-    for (const auto & colunm : chunk.getColumns())
-        colunm->updateHashFast(hash);
-
-    const auto hash_value = hash.get128();
-    return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
-}
-
-
-void DefineSourceWithChunkHashTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform");
-
-    if (token_info->isDefined())
-        return;
-
-    token_info->addChunkHash(getChunkHash(chunk));
-    token_info->finishChunkHashes();
-}
-
-void SetUserTokenTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in SetUserTokenTransform");
-    token_info->setUserToken(user_token);
-}
-
-void SetSourceBlockNumberTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform");
-    token_info->setSourceWithUserToken(block_number++);
-}
-
-void SetViewIDTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in SetViewIDTransform");
-    token_info->setViewID(view_id);
-}
-
-void SetViewBlockNumberTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform");
-    token_info->setViewBlockNumber(block_number++);
-}
-
-void ResetTokenTransform::transform(Chunk & chunk)
-{
-    auto token_info = chunk.getChunkInfos().get<TokenInfo>();
-    if (!token_info)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in ResetTokenTransform");
-
-    token_info->reset();
-}
-
-}
-}
--- a/src/Processors/Transforms/DeduplicationTokenTransforms.h
+++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h
@ -1,237 +0,0 @@
-#pragma once
-
-#include <Processors/Chunk.h>
-#include <Processors/ISimpleTransform.h>
-
-#include <base/defines.h>
-#include "Common/Logger.h"
-
-
-namespace DB
-{
-    class RestoreChunkInfosTransform : public ISimpleTransform
-    {
-    public:
-        RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_)
-                : ISimpleTransform(header_, header_, true)
-                , chunk_infos(std::move(chunk_infos_))
-        {}
-
-        String getName() const override { return "RestoreChunkInfosTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        Chunk::ChunkInfoCollection chunk_infos;
-    };
-
-
-namespace DeduplicationToken
-{
-    class TokenInfo : public ChunkInfoCloneable<TokenInfo>
-    {
-    public:
-        TokenInfo() = default;
-        TokenInfo(const TokenInfo & other) = default;
-
-        String getToken() const;
-        String debugToken() const;
-
-        bool empty() const { return parts.empty(); }
-
-        bool isDefined() const { return stage == DEFINED; }
-
-        void addChunkHash(String part);
-        void finishChunkHashes();
-
-        void setUserToken(const String & token);
-        void setSourceWithUserToken(size_t block_number);
-
-        void setViewID(const String & id);
-        void setViewBlockNumber(size_t block_number);
-
-        void reset();
-
-    private:
-        String getTokenImpl() const;
-
-        void addTokenPart(String part);
-        size_t getTotalSize() const;
-
-        /* Token has to be prepared in a particular order.
-        * BuildingStage ensures that token is expanded according the following order.
-        * Firstly token is expanded with information about the source.
-        * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number.
-        *
-        * transition // method
-        * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash
-        * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash
-        * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes
-        *
-        * transition // method
-        * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken
-        * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken
-        *
-        * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views.
-        *
-        * transition // method
-        * DEFINED -> DEFINE_VIEW // setViewID
-        * DEFINE_VIEW -> DEFINED // defineViewID
-        */
-
-        enum BuildingStage
-        {
-            UNDEFINED,
-            DEFINE_SOURCE_WITH_HASHES,
-            DEFINE_SOURCE_USER_TOKEN,
-            DEFINE_VIEW,
-            DEFINED,
-        };
-
-        BuildingStage stage = UNDEFINED;
-        std::vector<String> parts;
-    };
-
-
-#ifdef ABORT_ON_LOGICAL_ERROR
-    /// use that class only with debug builds in CI for introspection
-    class CheckTokenTransform : public ISimpleTransform
-    {
-    public:
-        CheckTokenTransform(String debug_, const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-            , debug(std::move(debug_))
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::CheckTokenTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        String debug;
-        LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform");
-    };
-#endif
-
-
-    class AddTokenInfoTransform : public ISimpleTransform
-    {
-    public:
-        explicit AddTokenInfoTransform(const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; }
-
-        void transform(Chunk & chunk) override
-        {
-            chunk.getChunkInfos().add(std::make_shared<TokenInfo>());
-        }
-    };
-
-
-    class DefineSourceWithChunkHashTransform : public ISimpleTransform
-    {
-    public:
-        explicit DefineSourceWithChunkHashTransform(const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; }
-
-        // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts.
-        // But if there is some table with different engine, we still need to define the source of the data in deduplication token
-        // We use that transform to define the source as a hash of entire block in deduplication token
-        void transform(Chunk & chunk) override;
-
-        static String getChunkHash(const Chunk & chunk);
-    };
-
-    class ResetTokenTransform : public ISimpleTransform
-    {
-    public:
-        explicit ResetTokenTransform(const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::ResetTokenTransform"; }
-
-        void transform(Chunk & chunk) override;
-    };
-
-
-    class SetUserTokenTransform : public ISimpleTransform
-    {
-    public:
-        SetUserTokenTransform(String user_token_, const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-            , user_token(std::move(user_token_))
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        String user_token;
-    };
-
-
-    class SetSourceBlockNumberTransform : public ISimpleTransform
-    {
-    public:
-        explicit SetSourceBlockNumberTransform(const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        size_t block_number = 0;
-    };
-
-
-    class SetViewIDTransform : public ISimpleTransform
-    {
-    public:
-        SetViewIDTransform(String view_id_, const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-            , view_id(std::move(view_id_))
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::SetViewIDTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        String view_id;
-    };
-
-
-    class SetViewBlockNumberTransform : public ISimpleTransform
-    {
-    public:
-        explicit SetViewBlockNumberTransform(const Block & header_)
-            : ISimpleTransform(header_, header_, true)
-        {
-        }
-
-        String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; }
-
-        void transform(Chunk & chunk) override;
-
-    private:
-        size_t block_number = 0;
-    };
-
-}
-}
--- a/src/Processors/Transforms/ExpressionTransform.cpp
+++ b/src/Processors/Transforms/ExpressionTransform.cpp
@ -1,7 +1,5 @@
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Interpreters/ExpressionActions.h>
-
-
 namespace DB
 {

--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
            return Status::Finished;
        }

-        task = data.chunk.getChunkInfos().get<DelayedBlocksTask>();
-        if (!task)
+        if (!data.chunk.hasChunkInfo())
            throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info");
+
+        task = std::dynamic_pointer_cast<const DelayedBlocksTask>(data.chunk.getChunkInfo());
    }
    else
    {
@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
            if (output.isFinished())
                continue;
            Chunk chunk;
-            chunk.getChunkInfos().add(std::make_shared<DelayedBlocksTask>());
+            chunk.setChunkInfo(std::make_shared<DelayedBlocksTask>());
            output.push(std::move(chunk));
            output.finish();
        }
@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
        {
            Chunk chunk;
            auto task = std::make_shared<DelayedBlocksTask>(delayed_blocks, left_delayed_stream_finished_counter);
-            chunk.getChunkInfos().add(std::move(task));
+            chunk.setChunkInfo(task);
            output.push(std::move(chunk));
        }
        delayed_blocks = nullptr;
--- a/src/Processors/Transforms/JoiningTransform.h
+++ b/src/Processors/Transforms/JoiningTransform.h
@ -1,7 +1,6 @@
 #pragma once
 #include <Processors/IProcessor.h>
-#include <Processors/Chunk.h>
-#include <memory>
+

 namespace DB
 {
@ -112,12 +111,11 @@ private:
 };


-class DelayedBlocksTask : public ChunkInfoCloneable<DelayedBlocksTask>
+class DelayedBlocksTask : public ChunkInfo
 {
 public:

    DelayedBlocksTask() = default;
-    DelayedBlocksTask(const DelayedBlocksTask & other) = default;
    explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_)
        : delayed_blocks(std::move(delayed_blocks_))
        , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_)
--- a/src/Processors/Transforms/MaterializingTransform.cpp
+++ b/src/Processors/Transforms/MaterializingTransform.cpp
@ -1,7 +1,6 @@
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Columns/ColumnSparse.h>

-
 namespace DB
 {

--- a/src/Processors/Transforms/MemoryBoundMerging.h
+++ b/src/Processors/Transforms/MemoryBoundMerging.h
@ -150,7 +150,11 @@ private:
        if (!chunk.hasRows())
            return;

-        const auto & agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+        const auto & info = chunk.getChunkInfo();
+        if (!info)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform.");
+
+        const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
        if (!agg_info)
            throw Exception(
                ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform.");
--- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
+++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_
    auto info = std::make_shared<ChunksToMerge>();
    info->bucket_num = bucket;
    info->is_overflows = is_overflows;
-    info->chunks = std::make_shared<Chunks>(std::move(chunks));
+    info->chunks = std::make_unique<Chunks>(std::move(chunks));

    Chunk chunk;
-    chunk.getChunkInfos().add(std::move(info));
+    chunk.setChunkInfo(std::move(info));
    output.push(std::move(chunk));
 }

@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
    if (!chunk.hasRows())
        return;

-    if (chunk.getChunkInfos().empty())
+    const auto & info = chunk.getChunkInfo();
+    if (!info)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform.");

-    if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
+    if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()))
    {
        Int32 bucket = agg_info->bucket_num;
        bool is_overflows = agg_info->is_overflows;
@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
            last_bucket_number[input] = bucket;
        }
    }
-    else if (chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
+    else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
    {
        single_level_chunks.emplace_back(std::move(chunk));
    }
@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work()
            Int32 bucket = cur_block.info.bucket_num;
            auto chunk_info = std::make_shared<AggregatedChunkInfo>();
            chunk_info->bucket_num = bucket;
-
-            auto chunk = Chunk(cur_block.getColumns(), cur_block.rows());
-            chunk.getChunkInfos().add(std::move(chunk_info));
-
-            chunks_map[bucket].emplace_back(std::move(chunk));
+            chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info)));
        }
    }
 }
@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(

 void MergingAggregatedBucketTransform::transform(Chunk & chunk)
 {
-    auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
+    const auto & info = chunk.getChunkInfo();
+    const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
+
    if (!chunks_to_merge)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.");

@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
    BlocksList blocks_list;
    for (auto & cur_chunk : *chunks_to_merge->chunks)
    {
-        if (cur_chunk.getChunkInfos().empty())
+        const auto & cur_info = cur_chunk.getChunkInfo();
+        if (!cur_info)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform.");

-        if (auto agg_info = cur_chunk.getChunkInfos().get<AggregatedChunkInfo>())
+        if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get()))
        {
            Block block = header.cloneWithColumns(cur_chunk.detachColumns());
            block.info.is_overflows = agg_info->is_overflows;
@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)

            blocks_list.emplace_back(std::move(block));
        }
-        else if (cur_chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
+        else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(cur_info.get()))
        {
            Block block = header.cloneWithColumns(cur_chunk.detachColumns());
            block.info.is_overflows = false;
@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
    res_info->is_overflows = chunks_to_merge->is_overflows;
    res_info->bucket_num = chunks_to_merge->bucket_num;
    res_info->chunk_num = chunks_to_merge->chunk_num;
-    chunk.getChunkInfos().add(std::move(res_info));
+    chunk.setChunkInfo(std::move(res_info));

    auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled);

@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk()

 void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
 {
-    auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+    const auto & info = chunk.getChunkInfo();
+    if (!info)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform.");
+
+    const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
    if (!agg_info)
        throw Exception(ErrorCodes::LOGICAL_ERROR,
            "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.");
--- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
+++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
@ -3,7 +3,6 @@
 #include <Core/SortDescription.h>
 #include <Common/HashTable/HashSet.h>
 #include <Interpreters/Aggregator.h>
-#include <Processors/Chunk.h>
 #include <Processors/IProcessor.h>
 #include <Processors/ISimpleTransform.h>
 #include <Processors/ResizeProcessor.h>
@ -143,9 +142,9 @@ private:
    void addChunk(Chunk chunk, size_t from_input);
 };

-struct ChunksToMerge : public ChunkInfoCloneable<ChunksToMerge>
+struct ChunksToMerge : public ChunkInfo
 {
-    std::shared_ptr<Chunks> chunks;
+    std::unique_ptr<Chunks> chunks;
    Int32 bucket_num = -1;
    bool is_overflows = false;
    UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order
--- a/src/Processors/Transforms/MergingAggregatedTransform.cpp
+++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp
@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk)
    total_input_rows += input_rows;
    ++total_input_blocks;

-    if (chunk.getChunkInfos().empty())
+    const auto & info = chunk.getChunkInfo();
+    if (!info)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform.");

-    if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
+    if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()))
    {
        /** If the remote servers used a two-level aggregation method,
          * then blocks will contain information about the number of the bucket.
@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk)

        bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block));
    }
-    else if (chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
+    else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
    {
        auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
        block.info.is_overflows = false;
@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate()

    UInt64 num_rows = block.rows();
    Chunk chunk(block.getColumns(), num_rows);
-
-    chunk.getChunkInfos().add(std::move(info));
+    chunk.setChunkInfo(std::move(info));

    return chunk;
 }
--- a/src/Processors/Transforms/PlanSquashingTransform.cpp
+++ b/src/Processors/Transforms/PlanSquashingTransform.cpp
@ -10,20 +10,20 @@ namespace ErrorCodes
 }

 PlanSquashingTransform::PlanSquashingTransform(
-    Block header_, size_t min_block_size_rows, size_t min_block_size_bytes)
-    : IInflatingTransform(header_, header_)
-    , squashing(header_, min_block_size_rows, min_block_size_bytes)
+    const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
+    : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes)
 {
 }

 void PlanSquashingTransform::consume(Chunk chunk)
 {
-    squashed_chunk = squashing.add(std::move(chunk));
+    if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo())
+        squashed_chunk.swap(current_chunk);
 }

 Chunk PlanSquashingTransform::generate()
 {
-    if (!squashed_chunk)
+    if (!squashed_chunk.hasChunkInfo())
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");

    Chunk result_chunk;
@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate()

 bool PlanSquashingTransform::canGenerate()
 {
-    return bool(squashed_chunk);
+    return squashed_chunk.hasChunkInfo();
 }

 Chunk PlanSquashingTransform::getRemaining()
 {
-    return squashing.flush();
+    Chunk current_chunk = squashing.flush();
+    return current_chunk;
 }
 }
--- a/src/Processors/Transforms/PlanSquashingTransform.h
+++ b/src/Processors/Transforms/PlanSquashingTransform.h
@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform
 {
 public:
    PlanSquashingTransform(
-        Block header_, size_t min_block_size_rows, size_t min_block_size_bytes);
+        const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes);

    String getName() const override { return "PlanSquashingTransform"; }

@ -23,6 +23,7 @@ protected:
 private:
    Squashing squashing;
    Chunk squashed_chunk;
+    Chunk finish_chunk;
 };
 }

--- a/src/Processors/Transforms/SelectByIndicesTransform.h
+++ b/src/Processors/Transforms/SelectByIndicesTransform.h
@ -26,7 +26,7 @@ public:
    void transform(Chunk & chunk) override
    {
        size_t num_rows = chunk.getNumRows();
-        auto select_final_indices_info = chunk.getChunkInfos().extract<ChunkSelectFinalIndices>();
+        const auto * select_final_indices_info = typeid_cast<const ChunkSelectFinalIndices *>(chunk.getChunkInfo().get());

        if (!select_final_indices_info || !select_final_indices_info->select_final_indices)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column");
@ -41,6 +41,7 @@ public:

            chunk.setColumns(std::move(columns), index_column->size());
        }
+        chunk.setChunkInfo(nullptr);
    }
 };

--- a/src/Processors/Transforms/SquashingTransform.cpp
+++ b/src/Processors/Transforms/SquashingTransform.cpp
@ -18,7 +18,9 @@ SquashingTransform::SquashingTransform(

 void SquashingTransform::onConsume(Chunk chunk)
 {
-    cur_chunk = Squashing::squash(squashing.add(std::move(chunk)));
+    Chunk planned_chunk = squashing.add(std::move(chunk));
+    if (planned_chunk.hasChunkInfo())
+        cur_chunk = DB::Squashing::squash(std::move(planned_chunk));
 }

 SquashingTransform::GenerateResult SquashingTransform::onGenerate()
@ -31,7 +33,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate()

 void SquashingTransform::onFinish()
 {
-    finish_chunk = Squashing::squash(squashing.flush());
+    Chunk chunk = squashing.flush();
+    if (chunk.hasChunkInfo())
+        chunk = DB::Squashing::squash(std::move(chunk));
+    finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
 }

 void SquashingTransform::work()
@ -44,7 +49,6 @@ void SquashingTransform::work()
    }

    ExceptionKeepingTransform::work();
-
    if (finish_chunk)
    {
        data.chunk = std::move(finish_chunk);
@ -63,14 +67,18 @@ void SimpleSquashingTransform::transform(Chunk & chunk)
 {
    if (!finished)
    {
-        chunk = Squashing::squash(squashing.add(std::move(chunk)));
+        Chunk planned_chunk = squashing.add(std::move(chunk));
+        if (planned_chunk.hasChunkInfo())
+            chunk = DB::Squashing::squash(std::move(planned_chunk));
    }
    else
    {
        if (chunk.hasRows())
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost");

-        chunk = Squashing::squash(squashing.flush());
+        chunk = squashing.flush();
+        if (chunk.hasChunkInfo())
+            chunk = DB::Squashing::squash(std::move(chunk));
    }
 }

--- a/src/Processors/Transforms/TotalsHavingTransform.cpp
+++ b/src/Processors/Transforms/TotalsHavingTransform.cpp
@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk)
    /// Block with values not included in `max_rows_to_group_by`. We'll postpone it.
    if (overflow_row)
    {
-        const auto & agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+        const auto & info = chunk.getChunkInfo();
+        if (!info)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform.");
+
+        const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
        if (!agg_info)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform.");

--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@ -17,6 +17,9 @@
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <Common/FieldVisitorsAccurateComparison.h>

+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
+
 #include <limits>


@ -71,6 +74,9 @@ public:
        size_t function_index) const = 0;

    virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
+
+    /// Is the frame type supported by this function.
+    virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; }
 };

 // Compares ORDER BY column values at given rows to find the boundaries of frame:
@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_,
            }
        }
    }
+
+    for (const auto & workspace : workspaces)
+    {
+        if (workspace.window_function_impl)
+        {
+            if (!workspace.window_function_impl->checkWindowFrameType(this))
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'",
+                    workspace.aggregate_function->getName());
+            }
+        }
+
+    }
 }

 WindowTransform::~WindowTransform()
@ -1609,6 +1628,34 @@ struct WindowFunctionHelpers
    {
        recurrent_detail::setValueToOutputColumn<T>(transform, function_index, value);
    }
+
+    ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; }
+
+    ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform)
+    {
+        /// This is for fast check.
+        if (!transform->partition_ended)
+            return false;
+
+        auto current_row = transform->current_row;
+        /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here.
+        current_row.row++;
+        const auto & partition_end_row = transform->partition_end;
+
+        /// The partition end is reached, when following is true
+        /// - current row is the partition end row,
+        /// - or current row is the last row of all input.
+        if (current_row != partition_end_row)
+        {
+            /// when current row is not the partition end row, we need to check whether it's the last
+            /// input row.
+            if (current_row.row < transform->blockRowsNumber(current_row))
+                return false;
+            if (partition_end_row.block != current_row.block + 1 || partition_end_row.row)
+                return false;
+        }
+        return true;
+    }
 };

 template<typename State>
@ -2058,8 +2105,6 @@ namespace
            const WindowTransform * transform,
            size_t function_index,
            const DataTypes & argument_types);
-
-        static void checkWindowFrameType(const WindowTransform * transform);
    };
 }

@ -2080,6 +2125,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction<NtileState>

    bool allocatesMemoryInArena() const override { return false; }

+    bool checkWindowFrameType(const WindowTransform * transform) const override
+    {
+        if (transform->order_by_indices.empty())
+        {
+            LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause");
+            return false;
+        }
+
+        // We must wait all for the partition end and get the total rows number in this
+        // partition. So before the end of this partition, there is no any block could be
+        // dropped out.
+        bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded
+            && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded;
+        if (!is_frame_supported)
+        {
+            LOG_ERROR(
+                getLogger("WindowFunctionNtile"),
+                "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
+            return false;
+        }
+        return true;
+    }
+
    std::optional<WindowFrame> getDefaultFrame() const override
    {
        WindowFrame frame;
@ -2106,7 +2174,6 @@ namespace
    {
        if (!buckets) [[unlikely]]
        {
-            checkWindowFrameType(transform);
            const auto & current_block = transform->blockAt(transform->current_row);
            const auto & workspace = transform->workspaces[function_index];
            const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]];
@ -2128,7 +2195,7 @@ namespace
            }
        }
        // new partition
-        if (transform->current_row_number == 1) [[unlikely]]
+        if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]]
        {
            current_partition_rows = 0;
            current_partition_inserted_row = 0;
@ -2137,25 +2204,9 @@ namespace
        current_partition_rows++;

        // Only do the action when we meet the last row in this partition.
-        if (!transform->partition_ended)
+        if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform))
            return;
-        else
-        {
-            auto current_row = transform->current_row;
-            current_row.row++;
-            const auto & end_row = transform->partition_end;
-            if (current_row != end_row)
-            {

-                if (current_row.row < transform->blockRowsNumber(current_row))
-                    return;
-                if (end_row.block != current_row.block + 1 || end_row.row)
-                {
-                    return;
-                }
-                // else, current_row is the last input row.
-            }
-        }
        auto bucket_capacity = current_partition_rows / buckets;
        auto capacity_diff = current_partition_rows - bucket_capacity * buckets;

@ -2193,23 +2244,115 @@ namespace
            bucket_num += 1;
        }
    }
+}

-    void NtileState::checkWindowFrameType(const WindowTransform * transform)
+namespace
+{
+struct PercentRankState
+{
+    RowNumber start_row;
+    UInt64 current_partition_rows = 0;
+};
+}
+
+struct WindowFunctionPercentRank final : public StatefulWindowFunction<PercentRankState>
+{
+public:
+    WindowFunctionPercentRank(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    {}
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    bool checkWindowFrameType(const WindowTransform * transform) const override
    {
-        if (transform->order_by_indices.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause");
+            if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE
+                || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded
+                || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current)
+            {
+                LOG_ERROR(
+                    getLogger("WindowFunctionPercentRank"),
+                    "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'");
+                return false;
+        }
+        return true;
+    }

-        // We must wait all for the partition end and get the total rows number in this
-        // partition. So before the end of this partition, there is no any block could be
-        // dropped out.
-        bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded
-            && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded;
-        if (!is_frame_supported)
+    std::optional<WindowFrame> getDefaultFrame() const override
+    {
+        WindowFrame frame;
+        frame.type = WindowFrame::FrameType::RANGE;
+        frame.begin_type = WindowFrame::BoundaryType::Unbounded;
+        frame.end_type = WindowFrame::BoundaryType::Current;
+        return frame;
+    }
+
+    void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override
+    {
+        auto & state = getWorkspaceState(transform, function_index);
+        if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform))
        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
+            state.current_partition_rows = 0;
+            state.start_row = transform->current_row;
+        }
+
+        insertRankIntoColumn(transform, function_index);
+        state.current_partition_rows++;
+
+        if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform))
+        {
+            return;
+        }
+
+        UInt64 remaining_rows = state.current_partition_rows;
+        Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1;
+
+        while (remaining_rows > 0)
+        {
+            auto block_rows_number = transform->blockRowsNumber(state.start_row);
+            auto available_block_rows = block_rows_number - state.start_row.row;
+            if (available_block_rows <= remaining_rows)
+            {
+                /// This partition involves multiple blocks. Finish current block and move on to the
+                /// next block.
+                auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index];
+                auto & data = assert_cast<ColumnFloat64 &>(to_column).getData();
+                for (size_t i = state.start_row.row; i < block_rows_number; ++i)
+                    data[i] = (data[i] - 1) / percent_rank_denominator;
+
+                state.start_row.block++;
+                state.start_row.row = 0;
+                remaining_rows -= available_block_rows;
+            }
+            else
+            {
+                /// The partition ends in current block.s
+                auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index];
+                auto & data = assert_cast<ColumnFloat64 &>(to_column).getData();
+                for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i)
+                {
+                    data[i] = (data[i] - 1) / percent_rank_denominator;
+                }
+                state.start_row.row += remaining_rows;
+                remaining_rows = 0;
+            }
        }
    }
-}
+
+
+    inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const
+    {
+        const auto & workspace = transform->workspaces[function_index];
+        return getState(workspace);
+    }
+
+    inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const
+    {
+        auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index];
+        assert_cast<ColumnFloat64 &>(to_column).getData().push_back(static_cast<Float64>(transform->peer_group_start_row_number));
+    }
+};

 // ClickHouse-specific variant of lag/lead that respects the window frame.
 template <bool is_lead>
@ -2582,6 +2725,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
                parameters);
        }, properties}, AggregateFunctionFactory::CaseInsensitive);

+    factory.registerFunction("percent_rank", {[](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters, const Settings *)
+        {
+            return std::make_shared<WindowFunctionPercentRank>(name, argument_types,
+                parameters);
+        }, properties}, AggregateFunctionFactory::CaseInsensitive);
+
    factory.registerFunction("row_number", {[](const std::string & name,
            const DataTypes & argument_types, const Array & parameters, const Settings *)
        {
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@ -5,9 +5,7 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Parsers/ASTInsertQuery.h>
-#include <Processors/Chunk.h>
 #include <Processors/Transforms/CountingTransform.h>
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
 #include <Processors/Transforms/PlanSquashingTransform.h>
 #include <Processors/Transforms/SquashingTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
@ -18,7 +16,6 @@
 #include <Storages/StorageMaterializedView.h>
 #include <Storages/StorageValues.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
-#include <Common/Logger.h>
 #include <Common/Exception.h>
 #include <Common/CurrentThread.h>
 #include <Common/MemoryTracker.h>
@ -27,12 +24,9 @@
 #include <Common/ThreadStatus.h>
 #include <Common/checkStackSize.h>
 #include <Common/logger_useful.h>
-#include "base/defines.h"
-#include <Core/Field.h>

 #include <atomic>
 #include <chrono>
-#include <memory>


 namespace ProfileEvents
@ -111,7 +105,7 @@ private:
 class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform
 {
 public:
-    ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_);
+    ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_);

    String getName() const override { return "ExecutingInnerQueryFromView"; }

@ -122,7 +116,6 @@ protected:
 private:
    ViewsDataPtr views_data;
    ViewRuntimeData & view;
-    bool disable_deduplication_for_children;

    struct State
    {
@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage
 public:
    PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_);
    String getName() const override { return "PushingToLiveViewSink"; }
-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;

 private:
    StorageLiveView & live_view;
@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage
 public:
    PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_);
    String getName() const override { return "PushingToWindowViewSink"; }
-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;

 private:
    StorageWindowView & window_view;
@ -223,10 +216,45 @@ std::optional<Chain> generateViewChain(

    const auto & insert_settings = insert_context->getSettingsRef();

+    // Do not deduplicate insertions into MV if the main insertion is Ok
    if (disable_deduplication_for_children)
    {
        insert_context->setSetting("insert_deduplicate", Field{false});
    }
+    else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
+        !insert_settings.insert_deduplication_token.value.empty())
+    {
+        /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle
+              * deduplication in complex INSERT flows.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
+              *          |                                     |
+              *          └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
+              *
+              * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
+              * be inserted into `ds_2_1`.
+              *
+              * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables
+              * are involved.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 --┬-> ds_1_1
+              *          |             |
+              *          └--> mv_1_2 --┘
+              *
+              */
+        auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
+
+        if (view_id.hasUUID())
+            insert_deduplication_token += "_" + toString(view_id.uuid);
+        else
+            insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
+
+        insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
+    }

    // Processing of blocks for MVs is done block by block, and there will
    // be no parallel reading after (plus it is not a costless operation)
@ -333,13 +361,7 @@ std::optional<Chain> generateViewChain(
                insert_columns.emplace_back(column.name);
        }

-        InterpreterInsertQuery interpreter(
-            nullptr,
-            insert_context,
-            /* allow_materialized */ false,
-            /* no_squash */ false,
-            /* no_destination */ false,
-            /* async_isnert */ false);
+        InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false);

        /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false`
        bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type;
@ -356,10 +378,6 @@ std::optional<Chain> generateViewChain(
                table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
        }

-#ifdef ABORT_ON_LOGICAL_ERROR
-        out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Before squashing", out.getInputHeader()));
-#endif
-
        auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), current_thread, insert_context->getQuota());
        counting->setProcessListElement(insert_context->getProcessListElement());
        counting->setProgressCallback(insert_context->getProgressCallback());
@ -402,19 +420,11 @@ std::optional<Chain> generateViewChain(

    if (type == QueryViewsLogElement::ViewType::MATERIALIZED)
    {
-#ifdef ABORT_ON_LOGICAL_ERROR
-        out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Right after Inner query", out.getInputHeader()));
-#endif
-
        auto executing_inner_query = std::make_shared<ExecutingInnerQueryFromViewTransform>(
-            storage_header, views_data->views.back(), views_data, disable_deduplication_for_children);
+            storage_header, views_data->views.back(), views_data);
        executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms);

        out.addSource(std::move(executing_inner_query));
-
-#ifdef ABORT_ON_LOGICAL_ERROR
-        out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Right before Inner query", out.getInputHeader()));
-#endif
    }

    return out;
@ -455,7 +465,11 @@ Chain buildPushingToViewsChain(
      */
    result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout));

-    bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views;
+    /// If the "root" table deduplicates blocks, there are no need to make deduplication for children
+    /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
+    bool disable_deduplication_for_children = false;
+    if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
+        disable_deduplication_for_children = !no_destination && storage->supportsDeduplication();

    auto table_id = storage->getStorageID();
    auto views = DatabaseCatalog::instance().getDependentViews(table_id);
@ -546,25 +560,12 @@ Chain buildPushingToViewsChain(
        auto sink = std::make_shared<PushingToLiveViewSink>(live_view_header, *live_view, storage, context);
        sink->setRuntimeData(thread_status, elapsed_counter_ms);
        result_chain.addSource(std::move(sink));
-
-        result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
    }
    else if (auto * window_view = dynamic_cast<StorageWindowView *>(storage.get()))
    {
        auto sink = std::make_shared<PushingToWindowViewSink>(window_view->getInputHeader(), *window_view, storage, context);
        sink->setRuntimeData(thread_status, elapsed_counter_ms);
        result_chain.addSource(std::move(sink));
-
-        result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
-    }
-    else if (dynamic_cast<StorageMaterializedView *>(storage.get()))
-    {
-        auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert);
-        metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName());
-        sink->setRuntimeData(thread_status, elapsed_counter_ms);
-        result_chain.addSource(std::move(sink));
-
-        result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
    }
    /// Do not push to destination table if the flag is set
    else if (!no_destination)
@ -572,15 +573,8 @@ Chain buildPushingToViewsChain(
        auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert);
        metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName());
        sink->setRuntimeData(thread_status, elapsed_counter_ms);
-
-        result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(sink->getHeader()));
-
        result_chain.addSource(std::move(sink));
    }
-    else
-    {
-        result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(storage_header));
-    }

    if (result_chain.empty())
        result_chain.addSink(std::make_shared<NullSinkToStorage>(storage_header));
@ -596,7 +590,7 @@ Chain buildPushingToViewsChain(
    return result_chain;
 }

-static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children)
+static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data)
 {
    const auto & context = view.context;

@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat
        pipeline.getHeader(),
        std::make_shared<ExpressionActions>(std::move(converting))));

-    pipeline.addTransform(std::make_shared<RestoreChunkInfosTransform>(std::move(chunk_infos), pipeline.getHeader()));
-
-    if (!disable_deduplication_for_children)
-    {
-        String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted();
-        pipeline.addTransform(std::make_shared<DeduplicationToken::SetViewIDTransform>(std::move(materialize_view_id), pipeline.getHeader()));
-        pipeline.addTransform(std::make_shared<DeduplicationToken::SetViewBlockNumberTransform>(pipeline.getHeader()));
-    }
-    else
-    {
-        pipeline.addTransform(std::make_shared<DeduplicationToken::ResetTokenTransform>(pipeline.getHeader()));
-    }
-
    return QueryPipelineBuilder::getPipeline(std::move(pipeline));
 }

@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare()
 ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform(
    const Block & header,
    ViewRuntimeData & view_,
-    std::shared_ptr<ViewsData> views_data_,
-    bool disable_deduplication_for_children_)
+    std::shared_ptr<ViewsData> views_data_)
    : ExceptionKeepingTransform(header, view_.sample_block)
    , views_data(std::move(views_data_))
    , view(view_)
-    , disable_deduplication_for_children(disable_deduplication_for_children_)
 {
 }

 void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk)
 {
-    auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
-    state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children));
+    auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
+    state.emplace(process(block, view, *views_data));
 }


@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi
 {
 }

-void PushingToLiveViewSink::consume(Chunk & chunk)
+void PushingToLiveViewSink::consume(Chunk chunk)
 {
    Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
-    live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context);
+    live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context);

    if (auto process = context->getProcessListElement())
        process->updateProgressIn(local_progress);
@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink(
 {
 }

-void PushingToWindowViewSink::consume(Chunk & chunk)
+void PushingToWindowViewSink::consume(Chunk chunk)
 {
    Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
    StorageWindowView::writeIntoWindowView(
-        window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context);
+        window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context);

    if (auto process = context->getProcessListElement())
        process->updateProgressIn(local_progress);
--- a/src/QueryPipeline/QueryPipelineBuilder.h
+++ b/src/QueryPipeline/QueryPipelineBuilder.h
@ -193,7 +193,7 @@ public:
        return concurrency_control;
    }

-    void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); }
+    void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); }
    void setQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); }
    void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); }

--- a/src/QueryPipeline/QueryPlanResourceHolder.cpp
+++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp
@ -5,7 +5,7 @@
 namespace DB
 {

-QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept
+QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept
 {
    table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end());
    storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end());
@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde
    return *this;
 }

-QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept
-{
-    append(std::move(rhs));
-    return *this;
-}
-
 QueryPlanResourceHolder::QueryPlanResourceHolder() = default;
 QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default;
 QueryPlanResourceHolder::~QueryPlanResourceHolder() = default;
--- a/src/QueryPipeline/QueryPlanResourceHolder.h
+++ b/src/QueryPipeline/QueryPlanResourceHolder.h
@ -20,11 +20,8 @@ struct QueryPlanResourceHolder
    QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept;
    ~QueryPlanResourceHolder();

-    QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete;
-
    /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs.
    QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept;
-    QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept;

    /// Some processors may implicitly use Context or temporary Storage created by Interpreter.
    /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here,
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro

    while (readDataNext())
    {
-        squashing.setHeader(state.block_for_insert.cloneEmpty());
-        auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}));
-        if (result_chunk)
+        squashing.header = state.block_for_insert;
+        auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()});
+        if (planned_chunk.hasChunkInfo())
        {
-            auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns());
+            Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk));
+            auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns());
            return PushResult
            {
                .status = PushResult::TOO_MUCH_DATA,
@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro
        }
    }

-    Chunk result_chunk = Squashing::squash(squashing.flush());
-    if (!result_chunk)
-    {
-        return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context);
-    }
+    auto planned_chunk = squashing.flush();
+    Chunk result_chunk;
+    if (planned_chunk.hasChunkInfo())
+        result_chunk = DB::Squashing::squash(std::move(planned_chunk));

-    auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns());
+    auto result = squashing.header.cloneWithColumns(result_chunk.getColumns());
    return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context);
 }

--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@ -134,7 +134,7 @@ DistributedSink::DistributedSink(
 }


-void DistributedSink::consume(Chunk & chunk)
+void DistributedSink::consume(Chunk chunk)
 {
    if (is_first_chunk)
    {
@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk)
        is_first_chunk = false;
    }

-    auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns());
+    auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns());

    if (insert_sync)
        writeSync(ordinary_block);
@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
                /// to resolve tables (in InterpreterInsertQuery::getTable())
                auto copy_query_ast = query_ast->clone();

-                InterpreterInsertQuery interp(
-                    copy_query_ast,
-                    job.local_context,
-                    allow_materialized,
-                    /* no_squash */ false,
-                    /* no_destination */ false,
-                    /* async_isnert */ false);
+                InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized);
                auto block_io = interp.execute();

                job.pipeline = std::move(block_io.pipeline);
@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const

    try
    {
-        InterpreterInsertQuery interp(
-            query_ast,
-            context,
-            allow_materialized,
-            /* no_squash */ false,
-            /* no_destination */ false,
-            /* async_isnert */ false);
+        InterpreterInsertQuery interp(query_ast, context, allow_materialized);

        auto block_io = interp.execute();
        PushingPipelineExecutor executor(block_io.pipeline);
--- a/src/Storages/Distributed/DistributedSink.h
+++ b/src/Storages/Distributed/DistributedSink.h
@ -49,7 +49,7 @@ public:
        const Names & columns_to_send_);

    String getName() const override { return "DistributedSink"; }
-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;
    void onFinish() override;

 private:
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews()

    auto new_context = Context::createCopy(getContext());

-    InterpreterInsertQuery interpreter(
-        insert,
-        new_context,
-        /* allow_materialized */ false,
-        /* no_squash */ true,
-        /* no_destination */ true,
-        /* async_isnert */ false);
-
+    InterpreterInsertQuery interpreter(insert, new_context, false, true, true);
    auto block_io = interpreter.execute();

    /// Each stream responsible for closing it's files and store meta
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews()

    // Create a stream for each consumer and join them in a union stream
    // Only insert into dependent views and expect that input blocks contain virtual columns
-    InterpreterInsertQuery interpreter(
-        insert,
-        kafka_context,
-        /* allow_materialized */ false,
-        /* no_squash */ true,
-        /* no_destination */ true,
-        /* async_isnert */ false);
+    InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true);
    auto block_io = interpreter.execute();

    // Create a stream for each consumer and join them in a union stream
--- a/src/Storages/LiveView/LiveViewSink.h
+++ b/src/Storages/LiveView/LiveViewSink.h
@ -71,9 +71,9 @@ public:
        new_hash.reset();
    }

-    void consume(Chunk & chunk) override
+    void consume(Chunk chunk) override
    {
-        auto block = getHeader().cloneWithColumns(chunk.getColumns());
+        auto block = getHeader().cloneWithColumns(chunk.detachColumns());
        block.updateHash(*new_hash);
        new_blocks->push_back(std::move(block));
    }
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@ -21,7 +21,6 @@ limitations under the License. */
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 #include <Processors/Executors/PipelineExecutor.h>
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
 #include <Processors/Transforms/SquashingTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <QueryPipeline/QueryPlanResourceHolder.h>
@ -331,7 +330,7 @@ Pipe StorageLiveView::watch(
    return reader;
 }

-void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context)
+void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context)
 {
    auto output = std::make_shared<LiveViewSink>(*this);

@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch
            builder = interpreter.buildQueryPipeline();
        }

-        builder.addSimpleTransform([&](const Block & cur_header)
-        {
-            return std::make_shared<RestoreChunkInfosTransform>(chunk_infos.clone(), cur_header);
-        });
-
-        String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted();
-        builder.addSimpleTransform([&](const Block & stream_header)
-        {
-            return std::make_shared<DeduplicationToken::SetViewIDTransform>(live_view_id, stream_header);
-        });
-        builder.addSimpleTransform([&](const Block & stream_header)
-        {
-            return std::make_shared<DeduplicationToken::SetViewBlockNumberTransform>(stream_header);
-        });
-
        builder.addSimpleTransform([&](const Block & cur_header)
        {
            return std::make_shared<MaterializingTransform>(cur_header);
--- a/src/Storages/LiveView/StorageLiveView.h
+++ b/src/Storages/LiveView/StorageLiveView.h
@ -118,7 +118,7 @@ public:
        return 0;
    }

-    void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context);
+    void writeBlock(const Block & block, ContextPtr context);

    void refresh();

--- a/src/Storages/MaterializedView/RefreshTask.cpp
+++ b/src/Storages/MaterializedView/RefreshTask.cpp
@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr<StorageMaterializedView
        {
            CurrentThread::QueryScope query_scope(refresh_context); // create a thread group for the query

-            BlockIO block_io = InterpreterInsertQuery(
-                refresh_query,
-                refresh_context,
-                /* allow_materialized */ false,
-                /* no_squash */ false,
-                /* no_destination */ false,
-                /* async_isnert */ false).execute();
+            BlockIO block_io = InterpreterInsertQuery(refresh_query, refresh_context).execute();
            QueryPipeline & pipeline = block_io.pipeline;

            pipeline.setProgressCallback([this](const Progress & prog)
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -2339,26 +2339,21 @@ String IMergeTreeDataPart::getUniqueId() const
    return getDataPartStorage().getUniqueId();
 }

-UInt128 IMergeTreeDataPart::getPartBlockIDHash() const
-{
-    SipHash hash;
-    checksums.computeTotalChecksumDataOnly(hash);
-    return hash.get128();
-}
-
 String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const
 {
    if (info.level != 0)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name);

+    SipHash hash;
    if (token.empty())
    {
-        const auto hash_value = getPartBlockIDHash();
-        return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
+        checksums.computeTotalChecksumDataOnly(hash);
+    }
+    else
+    {
+        hash.update(token.data(), token.size());
    }

-    SipHash hash;
-    hash.update(token.data(), token.size());
    const auto hash_value = hash.get128();
    return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
 }
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -210,7 +210,6 @@ public:

    /// Compute part block id for zero level part. Otherwise throws an exception.
    /// If token is not empty, block id is calculated based on it instead of block data
-    UInt128 getPartBlockIDHash() const;
    String getZeroLevelPartBlockID(std::string_view token) const;

    void setName(const String & new_name);
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@ -1,4 +1,5 @@
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
+#include <Common/MemoryTrackerBlockerInThread.h>

 namespace DB
 {
@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(

 Columns IMergeTreeDataPartWriter::releaseIndexColumns()
 {
-    return Columns(
-        std::make_move_iterator(index_columns.begin()),
-        std::make_move_iterator(index_columns.end()));
+    /// The memory for index was allocated without thread memory tracker.
+    /// We need to deallocate it in shrinkToFit without memory tracker as well.
+    MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
+
+    Columns result;
+    result.reserve(index_columns.size());
+
+    for (auto & column : index_columns)
+    {
+        column->shrinkToFit();
+        result.push_back(std::move(column));
+    }
+
+    index_columns.clear();
+    return result;
 }

 SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
            index_compressor_stream = std::make_unique<CompressedWriteBuffer>(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size);
            index_source_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_compressor_stream);
        }
+
+        const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types;
+        index_serializations.reserve(primary_key_types.size());
+
+        for (const auto & type : primary_key_types)
+            index_serializations.push_back(type->getDefaultSerialization());
    }
 }

@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
            store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment);
            gin_index_stores[stream_name] = store;
        }
+
        skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings));
        skip_index_accumulated_marks.push_back(0);
    }
 }

+void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row)
+{
+    chassert(index_block.columns() == index_serializations.size());
+    auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream;
+
+    for (size_t i = 0; i < index_block.columns(); ++i)
+    {
+        const auto & column = index_block.getByPosition(i).column;
+
+        index_columns[i]->insertFrom(*column, row);
+        index_serializations[i]->serializeBinary(*column, row, index_stream, {});
+    }
+}
+
 void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write)
 {
-    size_t primary_columns_num = primary_index_block.columns();
+    if (!metadata_snapshot->hasPrimaryKey())
+        return;
+
    if (index_columns.empty())
-    {
-        index_types = primary_index_block.getDataTypes();
-        index_columns.resize(primary_columns_num);
-        last_block_index_columns.resize(primary_columns_num);
-        for (size_t i = 0; i < primary_columns_num; ++i)
-            index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty();
-    }
+        index_columns = primary_index_block.cloneEmptyColumns();

    {
        /** While filling index (index_columns), disable memory tracker.
@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
        /// Write index. The index contains Primary Key value for each `index_granularity` row.
        for (const auto & granule : granules_to_write)
        {
-            if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start)
-            {
-                for (size_t j = 0; j < primary_columns_num; ++j)
-                {
-                    const auto & primary_column = primary_index_block.getByPosition(j);
-                    index_columns[j]->insertFrom(*primary_column.column, granule.start_row);
-                    primary_column.type->getDefaultSerialization()->serializeBinary(
-                        *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
-                }
-            }
+            if (granule.mark_on_start)
+                calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row);
        }
    }

-    /// store last index row to write final mark at the end of column
-    for (size_t j = 0; j < primary_columns_num; ++j)
-        last_block_index_columns[j] = primary_index_block.getByPosition(j).column;
+    /// Store block with last index row to write final mark at the end of column
+    if (with_final_mark)
+        last_index_block = primary_index_block;
 }

 void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat

    if (index_file_hashing_stream)
    {
-        if (write_final_mark)
+        if (write_final_mark && last_index_block)
        {
-            for (size_t j = 0; j < index_columns.size(); ++j)
-            {
-                const auto & column = *last_block_index_columns[j];
-                size_t last_row_number = column.size() - 1;
-                index_columns[j]->insertFrom(column, last_row_number);
-                index_types[j]->getDefaultSerialization()->serializeBinary(
-                    column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
-            }
-            last_block_index_columns.clear();
+            MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
+            calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1);
+            last_index_block.clear();
        }

        if (compress_primary_key)
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@ -173,10 +173,10 @@ protected:
    std::unique_ptr<HashingWriteBuffer> index_source_hashing_stream;
    bool compress_primary_key;

-    DataTypes index_types;
-    /// Index columns from the last block
-    /// It's written to index file in the `writeSuffixAndFinalizePart` method
-    Columns last_block_index_columns;
+    /// Last block with index columns.
+    /// It's written to index file in the `writeSuffixAndFinalizePart` method.
+    Block last_index_block;
+    Serializations index_serializations;

    bool data_written = false;

@ -193,6 +193,7 @@ private:
    void initStatistics();

    virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
+    void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row);

    struct ExecutionStatistics
    {
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@ -145,12 +145,8 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
                ordered_columns.push_back(res.block.getByName(name).column);
            }

-            auto chunk = Chunk(ordered_columns, res.row_count);
-            if (add_part_level)
-                chunk.getChunkInfos().add(std::make_shared<MergeTreePartLevelInfo>(task->getInfo().data_part->info.level));
-
            return ChunkAndProgress{
-                .chunk = std::move(chunk),
+                .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared<MergeTreePartLevelInfo>(task->getInfo().data_part->info.level) : nullptr),
                .num_read_rows = res.num_read_rows,
                .num_read_bytes = res.num_read_bytes,
                .is_finished = false};
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@ -264,10 +264,7 @@ try
                ++it;
            }

-            auto result = Chunk(std::move(res_columns), rows_read);
-            if (add_part_level)
-                result.getChunkInfos().add(std::make_shared<MergeTreePartLevelInfo>(data_part->info.level));
-            return result;
+            return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared<MergeTreePartLevelInfo>(data_part->info.level) : nullptr);
        }
    }
    else
--- a/src/Storages/MergeTree/MergeTreeSink.cpp
+++ b/src/Storages/MergeTree/MergeTreeSink.cpp
@ -1,27 +1,14 @@
-#include <Common/Logger.h>
-#include <Common/logger_useful.h>
-#include <Common/Exception.h>
-#include <Common/ProfileEventsScope.h>
-#include <Core/Settings.h>
-#include <DataTypes/ObjectUtils.h>
-#include <Interpreters/StorageID.h>
-#include <Interpreters/PartLog.h>
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
 #include <Storages/MergeTree/MergeTreeSink.h>
 #include <Storages/StorageMergeTree.h>
-
-#include <memory>
+#include <Interpreters/PartLog.h>
+#include <DataTypes/ObjectUtils.h>
+#include <Common/ProfileEventsScope.h>

 namespace ProfileEvents
 {
    extern const Event DuplicatedInsertedBlocks;
 }

-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 namespace DB
 {

@ -71,12 +58,12 @@ void MergeTreeSink::onCancel()
 {
 }

-void MergeTreeSink::consume(Chunk & chunk)
+void MergeTreeSink::consume(Chunk chunk)
 {
    if (num_blocks_processed > 0)
        storage.delayInsertOrThrowIfNeeded(nullptr, context, false);

-    auto block = getHeader().cloneWithColumns(chunk.getColumns());
+    auto block = getHeader().cloneWithColumns(chunk.detachColumns());
    if (!storage_snapshot->object_columns.empty())
        convertDynamicColumnsToTuples(block, storage_snapshot);

@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk)
    size_t streams = 0;
    bool support_parallel_write = false;

-    auto token_info = chunk.getChunkInfos().get<DeduplicationToken::TokenInfo>();
-    if (!token_info)
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}",
-            storage.getStorageID().getNameForLogs());
-
-    const bool need_to_define_dedup_token = !token_info->isDefined();
-
-    String block_dedup_token;
-    if (token_info->isDefined())
-        block_dedup_token = token_info->getToken();
-
    for (auto & current_block : part_blocks)
    {
        ProfileEvents::Counters part_counters;
@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk)
        if (!temp_part.part)
            continue;

-        if (need_to_define_dedup_token)
-        {
-            chassert(temp_part.part);
-            const auto hash_value = temp_part.part->getPartBlockIDHash();
-            token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]));
-        }
-
        if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite())
            support_parallel_write = true;

+        String block_dedup_token;
+        if (storage.getDeduplicationLog())
+        {
+            const String & dedup_token = settings.insert_deduplication_token;
+            if (!dedup_token.empty())
+            {
+                /// multiple blocks can be inserted within the same insert query
+                /// an ordinal number is added to dedup token to generate a distinctive block id for each block
+                block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum);
+                ++chunk_dedup_seqnum;
+            }
+        }
+
        size_t max_insert_delayed_streams_for_parallel_write;

        if (settings.max_insert_delayed_streams_for_parallel_write.changed)
@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk)

        /// In case of too much columns/parts in block, flush explicitly.
        streams += temp_part.streams.size();
-
        if (streams > max_insert_delayed_streams_for_parallel_write)
        {
            finishDelayedChunk();
@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk)
        {
            .temp_part = std::move(temp_part),
            .elapsed_ns = elapsed_ns,
-            .block_dedup_token = block_dedup_token,
+            .block_dedup_token = std::move(block_dedup_token),
            .part_counters = std::move(part_counters),
        });
    }

-    if (need_to_define_dedup_token)
-    {
-        token_info->finishChunkHashes();
-    }
-
    finishDelayedChunk();
    delayed_chunk = std::make_unique<MergeTreeSink::DelayedChunk>();
    delayed_chunk->partitions = std::move(partitions);
@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk()
    if (!delayed_chunk)
        return;

-    const Settings & settings = context->getSettingsRef();
-
    for (auto & partition : delayed_chunk->partitions)
    {
        ProfileEventsScope scoped_attach(&partition.part_counters);
@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk()
            storage.fillNewPartName(part, lock);

            auto * deduplication_log = storage.getDeduplicationLog();
-
-            if (settings.insert_deduplicate && deduplication_log)
+            if (deduplication_log)
            {
                const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token);
                auto res = deduplication_log->addPart(block_id, part->info);
--- a/src/Storages/MergeTree/MergeTreeSink.h
+++ b/src/Storages/MergeTree/MergeTreeSink.h
@ -25,7 +25,7 @@ public:
    ~MergeTreeSink() override;

    String getName() const override { return "MergeTreeSink"; }
-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;
    void onStart() override;
    void onFinish() override;
    void onCancel() override;
@ -36,6 +36,7 @@ private:
    size_t max_parts_per_block;
    ContextPtr context;
    StorageSnapshotPtr storage_snapshot;
+    UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token
    UInt64 num_blocks_processed = 0;

    /// We can delay processing for previous chunk and start writing a new one.
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare()
 bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
 {
    Block cur_block;
-    Block projection_header;
    if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block))
    {
        if (ctx->minmax_idx)
@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()

            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
            Block block_to_squash = projection.calculate(cur_block, ctx->context);
-            projection_squashes[i].setHeader(block_to_squash.cloneEmpty());
+            projection_squashes[i].header = block_to_squash;
+            Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()});

-            Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}));
-            if (squashed_chunk)
+            if (planned_chunk.hasChunkInfo())
            {
-                auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns());
+                Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk));
+
+                auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns());
                auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
                    *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
                tmp_part.finalize();
@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
    {
        const auto & projection = *ctx->projections_to_build[i];
        auto & projection_squash_plan = projection_squashes[i];
-        auto squashed_chunk = Squashing::squash(projection_squash_plan.flush());
-        if (squashed_chunk)
+        auto planned_chunk = projection_squash_plan.flush();
+        if (planned_chunk.hasChunkInfo())
        {
-            auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns());
+            Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk));
+
+            auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns());
            auto temp_part = MergeTreeDataWriter::writeTempProjectionPart(
                *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
            temp_part.finalize();
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@ -1,25 +1,21 @@
+#include <Storages/StorageReplicatedMergeTree.h>
+#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
+#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
+#include <Storages/MergeTree/InsertBlockInfo.h>
+#include <Interpreters/PartLog.h>
 #include "Common/Exception.h"
 #include <Common/FailPoint.h>
 #include <Common/ProfileEventsScope.h>
 #include <Common/SipHash.h>
-#include <Common/ThreadFuzzer.h>
 #include <Common/ZooKeeper/KeeperException.h>
-#include <Core/Block.h>
-#include <DataTypes/ObjectUtils.h>
-#include <IO/Operators.h>
-#include <Interpreters/PartLog.h>
-#include <Processors/Transforms/DeduplicationTokenTransforms.h>
-#include <Storages/MergeTree/AsyncBlockIDsCache.h>
-#include <Storages/MergeTree/InsertBlockInfo.h>
+#include <Common/ThreadFuzzer.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeTreeDataWriter.h>
-#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
-#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
-#include <Storages/StorageReplicatedMergeTree.h>
-
+#include <Storages/MergeTree/AsyncBlockIDsCache.h>
+#include <DataTypes/ObjectUtils.h>
+#include <Core/Block.h>
+#include <IO/Operators.h>
 #include <fmt/core.h>
-#include <memory>
-

 namespace ProfileEvents
 {
@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl<async_insert>::checkQuorumPrecondition(const
 }

 template<bool async_insert>
-void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)
+void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
 {
    if (num_blocks_processed > 0)
        storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false);

-    auto block = getHeader().cloneWithColumns(chunk.getColumns());
+    auto block = getHeader().cloneWithColumns(chunk.detachColumns());

    const auto & settings = context->getSettingsRef();

@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)

    if constexpr (async_insert)
    {
-        const auto async_insert_info_ptr = chunk.getChunkInfos().get<AsyncInsertInfo>();
-        if (async_insert_info_ptr)
+        const auto & chunk_info = chunk.getChunkInfo();
+        if (const auto * async_insert_info_ptr = typeid_cast<const AsyncInsertInfo *>(chunk_info.get()))
            async_insert_info = std::make_shared<AsyncInsertInfo>(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens);
        else
            throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts");
    }

-    String block_dedup_token;
-    auto token_info = chunk.getChunkInfos().get<DeduplicationToken::TokenInfo>();
-    if (!token_info)
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}",
-            storage.getStorageID().getNameForLogs());
-
-    const bool need_to_define_dedup_token = !token_info->isDefined();
-
-    if (token_info->isDefined())
-        block_dedup_token = token_info->getToken();
-
    auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info);

    using DelayedPartition = typename ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk::Partition;
@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)
        }
        else
        {
+
            if (deduplicate)
            {
+                String block_dedup_token;
+
                /// We add the hash from the data and partition identifier to deduplication ID.
                /// That is, do not insert the same data to the same partition twice.
+
+                const String & dedup_token = settings.insert_deduplication_token;
+                if (!dedup_token.empty())
+                {
+                    /// multiple blocks can be inserted within the same insert query
+                    /// an ordinal number is added to dedup token to generate a distinctive block id for each block
+                    block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum);
+                    ++chunk_dedup_seqnum;
+                }
+
                block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token);
                LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num));
            }
@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)
            {
                LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num));
            }
-
-            if (need_to_define_dedup_token)
-            {
-                chassert(temp_part.part);
-                const auto hash_value = temp_part.part->getPartBlockIDHash();
-                token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]));
-            }
        }

        profile_events_scope.reset();
@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)
        ));
    }

-    if (need_to_define_dedup_token)
-    {
-        token_info->finishChunkHashes();
-    }
-
    finishDelayedChunk(zookeeper);
    delayed_chunk = std::make_unique<ReplicatedMergeTreeSinkImpl::DelayedChunk>();
    delayed_chunk->partitions = std::move(partitions);

+    /// If deduplicated data should not be inserted into MV, we need to set proper
+    /// value for `last_block_is_duplicate`, which is possible only after the part is committed.
+    /// Othervide we can delay commit.
+    /// TODO: we can also delay commit if there is no MVs.
+    if (!settings.deduplicate_blocks_in_dependent_materialized_views)
+        finishDelayedChunk(zookeeper);
+
    ++num_blocks_processed;
 }

@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
    if (!delayed_chunk)
        return;

+    last_block_is_duplicate = false;
+
    for (auto & partition : delayed_chunk->partitions)
    {
        ProfileEventsScope scoped_attach(&partition.part_counters);
@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
        {
            bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second;

+            last_block_is_duplicate = last_block_is_duplicate || deduplicated;
+
            /// Set a special error code if the block is duplicate
            int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
            auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl<false>::writeExistingPart(MergeTreeData::Mutabl
    ProfileEventsScope profile_events_scope;

    String original_part_dir = part->getDataPartStorage().getPartDirectory();
-    auto try_rollback_part_rename = [this, &part, &original_part_dir] ()
+    auto try_rollback_part_rename = [this, &part, &original_part_dir]()
    {
        if (original_part_dir == part->getDataPartStorage().getPartDirectory())
            return;
@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::onStart()
 template<bool async_insert>
 void ReplicatedMergeTreeSinkImpl<async_insert>::onFinish()
 {
-    const auto & settings = context->getSettingsRef();
-
-    ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance(
-        settings.insert_keeper_fault_injection_probability,
-        settings.insert_keeper_fault_injection_seed,
-        storage.getZooKeeper(),
-        "ReplicatedMergeTreeSink::onFinish",
-        log);
-
-    finishDelayedChunk(zookeeper);
+    auto zookeeper = storage.getZooKeeper();
+    finishDelayedChunk(std::make_shared<ZooKeeperWithFaultInjection>(zookeeper));
 }

 template<bool async_insert>
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@ -51,7 +51,7 @@ public:
    ~ReplicatedMergeTreeSinkImpl() override;

    void onStart() override;
-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;
    void onFinish() override;

    String getName() const override { return "ReplicatedMergeTreeSink"; }
@ -59,6 +59,16 @@ public:
    /// For ATTACHing existing data on filesystem.
    bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);

+    /// For proper deduplication in MaterializedViews
+    bool lastBlockIsDuplicate() const override
+    {
+        /// If MV is responsible for deduplication, block is not considered duplicating.
+        if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
+            return false;
+
+        return last_block_is_duplicate;
+    }
+
    struct DelayedChunk;
 private:
    std::vector<String> detectConflictsInAsyncBlockIDs(const std::vector<String> & ids);
@ -116,6 +126,7 @@ private:
    bool allow_attach_while_readonly = false;
    bool quorum_parallel = false;
    const bool deduplicate = true;
+    bool last_block_is_duplicate = false;
    UInt64 num_blocks_processed = 0;

    LoggerPtr log;
--- a/src/Storages/MessageQueueSink.cpp
+++ b/src/Storages/MessageQueueSink.cpp
@ -40,7 +40,7 @@ void MessageQueueSink::onFinish()
    producer->finish();
 }

-void MessageQueueSink::consume(Chunk & chunk)
+void MessageQueueSink::consume(Chunk chunk)
 {
    const auto & columns = chunk.getColumns();
    if (columns.empty())
--- a/src/Storages/MessageQueueSink.h
+++ b/src/Storages/MessageQueueSink.h
@ -35,7 +35,7 @@ public:

    String getName() const override { return storage_name + "Sink"; }

-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;

    void onStart() override;
    void onFinish() override;
--- a/src/Storages/NATS/StorageNATS.cpp
+++ b/src/Storages/NATS/StorageNATS.cpp
@ -644,13 +644,7 @@ bool StorageNATS::streamToViews()
    insert->table_id = table_id;

    // Only insert into dependent views and expect that input blocks contain virtual columns
-    InterpreterInsertQuery interpreter(
-        insert,
-        nats_context,
-        /* allow_materialized */ false,
-        /* no_squash */ true,
-        /* no_destination */ true,
-        /* async_isnert */ false);
+    InterpreterInsertQuery interpreter(insert, nats_context, false, true, true);
    auto block_io = interpreter.execute();

    auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext());
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink(
        configuration->format, *write_buf, sample_block, context, format_settings_);
 }

-void StorageObjectStorageSink::consume(Chunk & chunk)
+void StorageObjectStorageSink::consume(Chunk chunk)
 {
    std::lock_guard lock(cancel_mutex);
    if (cancelled)
        return;
-    writer->write(getHeader().cloneWithColumns(chunk.getColumns()));
+    writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
 }

 void StorageObjectStorageSink::onCancel()
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@ -20,7 +20,7 @@ public:

    String getName() const override { return "StorageObjectStorageSink"; }

-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;

    void onCancel() override;

--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@ -454,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews()

    while (!shutdown_called && !file_iterator->isFinished())
    {
-        InterpreterInsertQuery interpreter(
-            insert,
-            queue_context,
-            /* allow_materialized */ false,
-            /* no_squash */ true,
-            /* no_destination */ true,
-            /* async_isnert */ false);
+        InterpreterInsertQuery interpreter(insert, queue_context, false, true, true);
        auto block_io = interpreter.execute();
        auto read_from_format_info = prepareReadingFromFormat(
            block_io.pipeline.getHeader().getNames(),
--- a/src/Storages/PartitionedSink.cpp
+++ b/src/Storages/PartitionedSink.cpp
@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key)
    return it->second;
 }

-void PartitionedSink::consume(Chunk & chunk)
+void PartitionedSink::consume(Chunk chunk)
 {
    const auto & columns = chunk.getColumns();

@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk)
    for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index)
    {
        auto sink = getSinkForPartitionKey(partition_key);
-        sink->consume(partition_index_to_chunk[partition_index]);
+        sink->consume(std::move(partition_index_to_chunk[partition_index]));
    }
 }

--- a/src/Storages/PartitionedSink.h
+++ b/src/Storages/PartitionedSink.h
@ -20,7 +20,7 @@ public:

    String getName() const override { return "PartitionedSink"; }

-    void consume(Chunk & chunk) override;
+    void consume(Chunk chunk) override;

    void onException(std::exception_ptr exception) override;

--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables()
                    insert->table_id = storage->getStorageID();
                    insert->columns = std::make_shared<ASTExpressionList>(buffer->columns_ast);

-                    InterpreterInsertQuery interpreter(
-                        insert,
-                        insert_context,
-                        /* allow_materialized */ true,
-                        /* no_squash */ false,
-                        /* no_destination */ false,
-                        /* async_isnert */ false);
+                    InterpreterInsertQuery interpreter(insert, insert_context, true);
                    auto io = interpreter.execute();
                    auto input = std::make_shared<SourceFromSingleChunk>(
                        result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows()));
--- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
+++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection

    auto insert_context = materialized_storage->getNestedTableContext();

-    InterpreterInsertQuery interpreter(
-        insert,
-        insert_context,
-        /* allow_materialized */ false,
-        /* no_squash */ false,
-        /* no_destination */ false,
-        /* async_isnert */ false);
+    InterpreterInsertQuery interpreter(insert, insert_context);
    auto block_io = interpreter.execute();

    const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata();
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews()
    }

    // Only insert into dependent views and expect that input blocks contain virtual columns
-    InterpreterInsertQuery interpreter(
-        insert,
-        rabbitmq_context,
-        /* allow_materialized */ false,
-        /* no_squash */ true,
-        /* no_destination */ true,
-        /* async_isnert */ false);
+    InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true);
    auto block_io = interpreter.execute();

    block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes)));
--- a/Show More
+++ b/Show More