Merge branch 'master' into kssenii-patch-8

2024-11-27 18:12:02 +00:00 · 2024-01-24 11:31:23 +01:00 · 2024-01-24 11:31:23 +01:00 · dec93dabde
commit dec93dabde
parent 291427d931 f2bad692e2
71 changed files with 595 additions and 2496 deletions
--- a/docs/changelogs/v23.9.1.1854-stable.md
+++ b/docs/changelogs/v23.9.1.1854-stable.md
@ -11,6 +11,7 @@ sidebar_label: 2023
 * Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)).
 * IPv6 bloom filter indexes created prior to March 2023 are not compatible with current version and have to be rebuilt. [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
 #### New Feature
 * Added new type of authentication based on SSH keys. It works only for Native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)).
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -112,7 +112,7 @@ Note that:
 For the query to run successfully, the following conditions must be met:
 - Both tables must have the same structure.
- Both tables must have the same order by key and the same primary key.
+- Both tables must have the same partition key, the same order by key and the same primary key.
 - Both tables must have the same indices and projections.
 - Both tables must have the same storage policy.
--- a/docs/en/sql-reference/table-functions/executable.md
+++ b/docs/en/sql-reference/table-functions/executable.md
@ -7,7 +7,7 @@ keywords: [udf, user defined function, clickhouse, executable, table, function]
 # executable Table Function for UDFs
-The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source.
+The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source. Make sure your ClickHouse server has all the required packages to run the executable script. For example, if it is a Python script, ensure that the server has the necessary Python packages installed.
 You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@ -160,7 +160,7 @@ int DisksApp::main(const std::vector<String> & /*args*/)
    }
    else
    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specifiged");
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified");
    }
    if (config().has("save-logs"))
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@ -4,9 +4,9 @@
 #include <Disks/DiskFactory.h>
 #include <IO/FileEncryptionCommon.h>
 #include <IO/ReadBufferFromEncryptedFile.h>
 #include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromEncryptedFile.h>
 #include <IO/ReadBufferFromEmptyFile.h>
 #include <boost/algorithm/hex.hpp>
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
@ -374,7 +374,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
    {
        /// File is empty, that's a normal case, see DiskEncrypted::truncateFile().
        /// There is no header so we just return `ReadBufferFromString("")`.
-        return std::make_unique<ReadBufferFromFileDecorator>(std::make_unique<ReadBufferFromString>(std::string_view{}), wrapped_path);
+        return std::make_unique<ReadBufferFromEmptyFile>(wrapped_path);
    }
    auto encryption_settings = current_settings.get();
    FileEncryption::Header header = readHeader(*buffer);
--- a/src/Disks/DiskEncryptedTransaction.cpp
+++ b/src/Disks/DiskEncryptedTransaction.cpp
@ -6,7 +6,6 @@
 #include <Common/Exception.h>
 #include <boost/algorithm/hex.hpp>
 #include <IO/ReadBufferFromEncryptedFile.h>
 #include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromEncryptedFile.h>
 #include <Common/quoteString.h>
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@ -1,6 +1,6 @@
 #include "ReadBufferFromRemoteFSGather.h"
-#include <IO/SeekableReadBuffer.h>
+#include <IO/ReadBufferFromFileBase.h>
 #include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
 #include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
@ -61,7 +61,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
        current_object = blobs_to_read.front();
 }
-SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
+std::unique_ptr<ReadBufferFromFileBase> ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
 {
    if (current_buf && !with_cache)
    {
@ -78,7 +78,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
    if (with_cache)
    {
        auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
-        return std::make_shared<CachedOnDiskReadBufferFromFile>(
+        return std::make_unique<CachedOnDiskReadBufferFromFile>(
            object_path,
            cache_key,
            settings.remote_fs_cache,
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
@ -53,7 +53,7 @@ public:
    bool isContentCached(size_t offset, size_t size) override;
 private:
-    SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object);
+    std::unique_ptr<ReadBufferFromFileBase> createImplementationBuffer(const StoredObject & object);
    bool nextImpl() override;
@ -80,7 +80,7 @@ private:
    StoredObject current_object;
    size_t current_buf_idx = 0;
-    SeekableReadBufferPtr current_buf;
+    std::unique_ptr<ReadBufferFromFileBase> current_buf;
    Poco::Logger * log;
 };
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@ -39,7 +39,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
    size_t alignment)
 {
    if (file_size.has_value() && !*file_size)
-        return std::make_unique<ReadBufferFromEmptyFile>();
+        return std::make_unique<ReadBufferFromEmptyFile>(filename);
    size_t estimated_size = 0;
    if (read_hint.has_value())
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@ -531,7 +531,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
    const bool file_can_be_empty = !file_size.has_value() || *file_size == 0;
    if (storage_objects.empty() && file_can_be_empty)
-        return std::make_unique<ReadBufferFromEmptyFile>();
+        return std::make_unique<ReadBufferFromEmptyFile>(path);
    return object_storage->readObjects(
        storage_objects,
--- a/src/IO/Archives/LibArchiveReader.cpp
+++ b/src/IO/Archives/LibArchiveReader.cpp
@ -228,7 +228,12 @@ public:
    off_t getPosition() override
    {
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive");
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition is not supported when reading from archive");
    }
    size_t getFileOffsetOfBufferEnd() const override
    {
        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from archive");
    }
    String getFileName() const override { return handle.getFileName(); }
--- a/src/IO/Archives/ZipArchiveReader.cpp
+++ b/src/IO/Archives/ZipArchiveReader.cpp
@ -15,6 +15,7 @@ namespace ErrorCodes
    extern const int CANNOT_UNPACK_ARCHIVE;
    extern const int LOGICAL_ERROR;
    extern const int SEEK_POSITION_OUT_OF_BOUND;
    extern const int UNSUPPORTED_METHOD;
    extern const int CANNOT_SEEK_THROUGH_FILE;
 }
@ -252,6 +253,11 @@ public:
        checkResult(err);
    }
    size_t getFileOffsetOfBufferEnd() const override
    {
        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from zip archive");
    }
    off_t seek(off_t off, int whence) override
    {
        off_t current_pos = getPosition();
--- a/src/IO/BoundedReadBuffer.cpp
+++ b/src/IO/BoundedReadBuffer.cpp
@ -4,8 +4,7 @@
 namespace DB
 {
-BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_)
+BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_) : impl(std::move(impl_))
    : ReadBufferFromFileDecorator(std::move(impl_))
 {
 }
--- a/src/IO/BoundedReadBuffer.h
+++ b/src/IO/BoundedReadBuffer.h
@ -1,5 +1,5 @@
 #pragma once
-#include <IO/ReadBufferFromFileDecorator.h>
+#include <IO/ReadBufferFromFileBase.h>
 namespace DB
@ -7,10 +7,10 @@ namespace DB
 /// A buffer which allows to make an underlying buffer as right bounded,
 /// e.g. the buffer cannot return data beyond offset specified in `setReadUntilPosition`.
-class BoundedReadBuffer : public ReadBufferFromFileDecorator
+class BoundedReadBuffer : public ReadBufferFromFileBase
 {
 public:
-    explicit BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_);
+    explicit BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_);
    bool supportsRightBoundedReads() const override { return true; }
@ -23,6 +23,8 @@ public:
    off_t seek(off_t off, int whence) override;
    size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }
    String getFileName() const override { return impl->getFileName(); }
    size_t getFileSize() override { return impl->getFileSize(); }
    /// file_offset_of_buffer_end can differ from impl's file_offset_of_buffer_end
    /// because of resizing of the tail. => Need to also override getPosition() as
@ -30,6 +32,8 @@ public:
    off_t getPosition() override;
 private:
    std::unique_ptr<ReadBufferFromFileBase> impl;
    std::optional<size_t> read_until_position;
    /// atomic because can be used in log or exception messages while being updated.
    std::atomic<size_t> file_offset_of_buffer_end = 0;
--- a/src/IO/LimitSeekableReadBuffer.h
+++ b/src/IO/LimitSeekableReadBuffer.h
@ -18,7 +18,6 @@ public:
    /// Returns adjusted position, i.e. returns `3` if the position in the nested buffer is `start_offset + 3`.
    off_t getPosition() override;
    off_t seek(off_t off, int whence) override;
 private:
--- a/src/IO/MMapReadBufferFromFileDescriptor.cpp
+++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp
@ -92,6 +92,11 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize()
    return getSizeFromFileDescriptor(getFD(), getFileName());
 }
 size_t MMapReadBufferFromFileDescriptor::getFileOffsetOfBufferEnd() const
 {
     return mapped.getOffset() + mapped.getLength();
 }
 size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &)
 {
    if (offset >= mapped.getLength())
--- a/src/IO/MMapReadBufferFromFileDescriptor.h
+++ b/src/IO/MMapReadBufferFromFileDescriptor.h
@ -36,6 +36,8 @@ public:
    std::string getFileName() const override;
    size_t getFileOffsetOfBufferEnd() const override;
    int getFD() const;
    size_t getFileSize() override;
--- a/src/IO/MMapReadBufferFromFileWithCache.cpp
+++ b/src/IO/MMapReadBufferFromFileWithCache.cpp
@ -76,4 +76,9 @@ off_t MMapReadBufferFromFileWithCache::seek(off_t offset, int whence)
    return new_pos;
 }
 size_t MMapReadBufferFromFileWithCache::getFileOffsetOfBufferEnd() const
 {
    return mapped->getOffset() + mapped->getLength();
 }
 }
--- a/src/IO/MMapReadBufferFromFileWithCache.h
+++ b/src/IO/MMapReadBufferFromFileWithCache.h
@ -19,7 +19,7 @@ public:
    off_t getPosition() override;
    std::string getFileName() const override;
    off_t seek(off_t offset, int whence) override;
-
+    size_t getFileOffsetOfBufferEnd() const override;
    bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; }
 private:
--- a/src/IO/ReadBufferFromEmptyFile.h
+++ b/src/IO/ReadBufferFromEmptyFile.h
@ -14,12 +14,18 @@ namespace DB
 /// - ThreadPoolReader
 class ReadBufferFromEmptyFile : public ReadBufferFromFileBase
 {
 public:
    explicit ReadBufferFromEmptyFile(const String & file_name_) : file_name(file_name_) {}
 private:
    String file_name;
    bool nextImpl() override { return false; }
-    std::string getFileName() const override { return "<empty>"; }
+    std::string getFileName() const override { return file_name; }
    off_t seek(off_t /*off*/, int /*whence*/) override { return 0; }
    off_t getPosition() override { return 0; }
    size_t getFileSize() override { return 0; }
    size_t getFileOffsetOfBufferEnd() const override { return 0; }
 };
 }
--- a/src/IO/ReadBufferFromEncryptedFile.cpp
+++ b/src/IO/ReadBufferFromEncryptedFile.cpp
@ -101,6 +101,18 @@ bool ReadBufferFromEncryptedFile::nextImpl()
    return true;
 }
 size_t ReadBufferFromEncryptedFile::getFileSize()
 {
    size_t size = in->getFileSize();
    return size > FileEncryption::Header::kSize ? size - FileEncryption::Header::kSize : size;
 }
 size_t ReadBufferFromEncryptedFile::getFileOffsetOfBufferEnd() const
 {
    size_t file_offset = in->getFileOffsetOfBufferEnd();
    return file_offset > FileEncryption::Header::kSize ? file_offset - FileEncryption::Header::kSize : file_offset;
 }
 }
 #endif
--- a/src/IO/ReadBufferFromEncryptedFile.h
+++ b/src/IO/ReadBufferFromEncryptedFile.h
@ -27,10 +27,10 @@ public:
    std::string getFileName() const override { return in->getFileName(); }
    void setReadUntilPosition(size_t position) override { in->setReadUntilPosition(position + FileEncryption::Header::kSize); }
    void setReadUntilEnd() override { in->setReadUntilEnd(); }
-    size_t getFileSize() override { return in->getFileSize(); }
+    size_t getFileSize() override;
    size_t getFileOffsetOfBufferEnd() const override;
 private:
    bool nextImpl() override;
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@ -60,6 +60,12 @@ public:
    /// file offset and what getPosition() returns.
    virtual bool isRegularLocalFile(size_t * /* out_view_offset */ = nullptr) { return false; }
    /// NOTE: This method should be thread-safe against seek(), since it can be
    /// used in CachedOnDiskReadBufferFromFile from multiple threads (because
    /// it first releases the buffer, and then do logging, and so other thread
    /// can already call seek() which will lead to data-race).
    virtual size_t getFileOffsetOfBufferEnd() const = 0;
 protected:
    std::optional<size_t> file_size;
    ProfileCallback profile_callback;
--- a/src/IO/ReadBufferFromFileDecorator.cpp
+++ b/src/IO/ReadBufferFromFileDecorator.cpp
@ -1,60 +0,0 @@
 #include <IO/ReadBufferFromFileDecorator.h>
 namespace DB
 {
 ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_)
    : ReadBufferFromFileDecorator(std::move(impl_), "")
 {
 }
 ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_)
    : impl(std::move(impl_)), file_name(file_name_)
 {
    swap(*impl);
 }
 std::string ReadBufferFromFileDecorator::getFileName() const
 {
    if (!file_name.empty())
        return file_name;
    return getFileNameFromReadBuffer(*impl);
 }
 off_t ReadBufferFromFileDecorator::getPosition()
 {
    swap(*impl);
    auto position = impl->getPosition();
    swap(*impl);
    return position;
 }
 off_t ReadBufferFromFileDecorator::seek(off_t off, int whence)
 {
    swap(*impl);
    auto result = impl->seek(off, whence);
    swap(*impl);
    return result;
 }
 bool ReadBufferFromFileDecorator::nextImpl()
 {
    swap(*impl);
    auto result = impl->next();
    swap(*impl);
    return result;
 }
 size_t ReadBufferFromFileDecorator::getFileSize()
 {
    return getFileSizeFromReadBuffer(*impl);
 }
 }
--- a/src/IO/ReadBufferFromFileDecorator.h
+++ b/src/IO/ReadBufferFromFileDecorator.h
@ -1,37 +0,0 @@
 #pragma once
 #include <IO/ReadBufferFromFileBase.h>
 namespace DB
 {
 /// Delegates all reads to underlying buffer. Doesn't have own memory.
 class ReadBufferFromFileDecorator : public ReadBufferFromFileBase
 {
 public:
    explicit ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_);
    ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_);
    std::string getFileName() const override;
    off_t getPosition() override;
    off_t seek(off_t off, int whence) override;
    bool nextImpl() override;
    bool isWithFileSize() const { return dynamic_cast<const WithFileSize *>(impl.get()) != nullptr; }
    const ReadBuffer & getWrappedReadBuffer() const { return *impl; }
    ReadBuffer & getWrappedReadBuffer() { return *impl; }
    size_t getFileSize() override;
 protected:
    std::unique_ptr<SeekableReadBuffer> impl;
    String file_name;
 };
 }
--- a/src/IO/ReadBufferFromMemory.h
+++ b/src/IO/ReadBufferFromMemory.h
@ -20,7 +20,6 @@ public:
        : SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {}
    off_t seek(off_t off, int whence) override;
    off_t getPosition() override;
 };
--- a/src/IO/SeekableReadBuffer.h
+++ b/src/IO/SeekableReadBuffer.h
@ -44,12 +44,6 @@ public:
    virtual String getInfoForLog() { return ""; }
    /// NOTE: This method should be thread-safe against seek(), since it can be
    /// used in CachedOnDiskReadBufferFromFile from multiple threads (because
    /// it first releases the buffer, and then do logging, and so other thread
    /// can already call seek() which will lead to data-race).
    virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); }
    /// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
    virtual bool supportsRightBoundedReads() const { return false; }
--- a/src/IO/WithFileSize.cpp
+++ b/src/IO/WithFileSize.cpp
@ -2,7 +2,6 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/CompressedReadBufferWrapper.h>
 #include <IO/ParallelReadBuffer.h>
 #include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/PeekableReadBuffer.h>
 namespace DB
@ -17,23 +16,15 @@ template <typename T>
 static size_t getFileSize(T & in)
 {
    if (auto * with_file_size = dynamic_cast<WithFileSize *>(&in))
    {
        return with_file_size->getFileSize();
    }
    throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size");
 }
 size_t getFileSizeFromReadBuffer(ReadBuffer & in)
 {
-    if (auto * delegate = dynamic_cast<ReadBufferFromFileDecorator *>(&in))
+    if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
    {
        return getFileSize(delegate->getWrappedReadBuffer());
    }
    else if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
    {
        return getFileSize(compressed->getWrappedReadBuffer());
    }
    return getFileSize(in);
 }
@ -52,11 +43,7 @@ std::optional<size_t> tryGetFileSizeFromReadBuffer(ReadBuffer & in)
 bool isBufferWithFileSize(const ReadBuffer & in)
 {
-    if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in))
+    if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
    {
        return delegate->isWithFileSize();
    }
    else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
    {
        return isBufferWithFileSize(compressed->getWrappedReadBuffer());
    }
@ -66,11 +53,7 @@ bool isBufferWithFileSize(const ReadBuffer & in)
 size_t getDataOffsetMaybeCompressed(const ReadBuffer & in)
 {
-    if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in))
+    if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
    {
        return getDataOffsetMaybeCompressed(delegate->getWrappedReadBuffer());
    }
    else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
    {
        return getDataOffsetMaybeCompressed(compressed->getWrappedReadBuffer());
    }
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@ -322,7 +322,6 @@ void executeQuery(
 void executeQueryWithParallelReplicas(
    QueryPlan & query_plan,
    const StorageID & main_table,
    SelectStreamFactory & stream_factory,
    const ASTPtr & query_ast,
    ContextPtr context,
@ -414,7 +413,6 @@ void executeQueryWithParallelReplicas(
        std::move(coordinator),
        stream_factory.header,
        stream_factory.processed_stage,
        main_table,
        new_context,
        getThrottler(new_context),
        std::move(scalars),
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@ -70,7 +70,6 @@ void executeQuery(
 void executeQueryWithParallelReplicas(
    QueryPlan & query_plan,
    const StorageID & main_table,
    SelectStreamFactory & stream_factory,
    const ASTPtr & query_ast,
    ContextPtr context,
--- a/src/Interpreters/MonotonicityCheckVisitor.h
+++ b/src/Interpreters/MonotonicityCheckVisitor.h
@ -1,17 +1,13 @@
 #pragma once
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Core/Range.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/FieldToDataType.h>
 #include <Functions/FunctionFactory.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/applyFunction.h>
+#include <Interpreters/IdentifierSemantic.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTOrderByElement.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/IAST.h>
@ -37,8 +33,6 @@ public:
        ASTIdentifier * identifier = nullptr;
        DataTypePtr arg_data_type = {};
        Range range = Range::createWholeUniverse();
        void reject() { monotonicity.is_monotonic = false; }
        bool isRejected() const { return !monotonicity.is_monotonic; }
@ -103,30 +97,13 @@ public:
        if (data.isRejected())
            return;
-        /// Monotonicity check only works for functions that contain at most two arguments and one of them must be a constant.
+        /// TODO: monotonicity for functions of several arguments
-        if (!ast_function.arguments)
+        if (!ast_function.arguments || ast_function.arguments->children.size() != 1)
        {
            data.reject();
            return;
        }
        auto arguments_size = ast_function.arguments->children.size();
        if (arguments_size == 0 || arguments_size > 2)
        {
            data.reject();
            return;
        }
        else if (arguments_size == 2)
        {
            /// If the function has two arguments, then one of them must be a constant.
            if (!ast_function.arguments->children[0]->as<ASTLiteral>() && !ast_function.arguments->children[1]->as<ASTLiteral>())
            {
                data.reject();
                return;
            }
        }
        if (!data.canOptimize(ast_function))
        {
            data.reject();
@ -147,33 +124,14 @@ public:
            return;
        }
-        auto function_arguments = getFunctionArguments(ast_function, data);
+        ColumnsWithTypeAndName args;
-
+        args.emplace_back(data.arg_data_type, "tmp");
-        auto function_base = function->build(function_arguments);
+        auto function_base = function->build(args);
        if (function_base && function_base->hasInformationAboutMonotonicity())
        {
            bool is_positive = data.monotonicity.is_positive;
-            data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, data.range.left, data.range.right);
+            data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field());
            auto & key_range = data.range;
            /// If we apply function to open interval, we can get empty intervals in result.
            /// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001').
            /// To avoid this we make range left and right included.
            /// Any function that treats NULL specially is not monotonic.
            /// Thus we can safely use isNull() as an -Inf/+Inf indicator here.
            if (!key_range.left.isNull())
            {
                key_range.left = applyFunction(function_base, data.arg_data_type, key_range.left);
                key_range.left_included = true;
            }
            if (!key_range.right.isNull())
            {
                key_range.right = applyFunction(function_base, data.arg_data_type, key_range.right);
                key_range.right_included = true;
            }
            if (!is_positive)
                data.monotonicity.is_positive = !data.monotonicity.is_positive;
@ -185,53 +143,13 @@ public:
    static bool needChildVisit(const ASTPtr & parent, const ASTPtr &)
    {
-        /// Multi-argument functions with all but one constant arguments can be monotonic.
+        /// Currently we check monotonicity only for single-argument functions.
        /// Although, multi-argument functions with all but one constant arguments can also be monotonic.
        if (const auto * func = typeid_cast<const ASTFunction *>(parent.get()))
-            return func->arguments->children.size() <= 2;
+            return func->arguments->children.size() < 2;
        return true;
    }
    static ColumnWithTypeAndName extractLiteralColumnAndTypeFromAstLiteral(const ASTLiteral * literal)
    {
        ColumnWithTypeAndName result;
        result.type = applyVisitor(FieldToDataType(), literal->value);
        result.column = result.type->createColumnConst(0, literal->value);
        return result;
    }
    static ColumnsWithTypeAndName getFunctionArguments(const ASTFunction & ast_function, const Data & data)
    {
        ColumnsWithTypeAndName args;
        auto arguments_size = ast_function.arguments->children.size();
        chassert(arguments_size == 1 || arguments_size == 2);
        if (arguments_size == 2)
        {
            if (ast_function.arguments->children[0]->as<ASTLiteral>())
            {
                const auto * literal = ast_function.arguments->children[0]->as<ASTLiteral>();
                args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
                args.emplace_back(data.arg_data_type, "tmp");
            }
            else
            {
                const auto * literal = ast_function.arguments->children[1]->as<ASTLiteral>();
                args.emplace_back(data.arg_data_type, "tmp");
                args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
            }
        }
        else
        {
            args.emplace_back(data.arg_data_type, "tmp");
        }
        return args;
    }
 };
 using MonotonicityCheckVisitor = ConstInDepthNodeVisitor<MonotonicityCheckMatcher, false>;
--- a/src/Interpreters/applyFunction.cpp
+++ b/src/Interpreters/applyFunction.cpp
@ -1,43 +0,0 @@
 #include <Interpreters/applyFunction.h>
 #include <Core/Range.h>
 #include <Functions/IFunction.h>
 namespace DB
 {
 static Field applyFunctionForField(const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value)
 {
    ColumnsWithTypeAndName columns{
        {arg_type->createColumnConst(1, arg_value), arg_type, "x"},
    };
    auto col = func->execute(columns, func->getResultType(), 1);
    return (*col)[0];
 }
 FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
 {
    /// Fallback for fields without block reference.
    if (field.isExplicit())
        return applyFunctionForField(func, current_type, field);
    String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
    const auto & columns = field.columns;
    size_t result_idx = columns->size();
    for (size_t i = 0; i < result_idx; ++i)
        if ((*columns)[i].name == result_name)
            result_idx = i;
    if (result_idx == columns->size())
    {
        ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
        field.columns->emplace_back(ColumnWithTypeAndName{nullptr, func->getResultType(), result_name});
        (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
    }
    return {field.columns, field.row_idx, result_idx};
 }
 }
--- a/src/Interpreters/applyFunction.h
+++ b/src/Interpreters/applyFunction.h
@ -1,16 +0,0 @@
 #pragma once
 #include <memory>
 namespace DB
 {
 struct FieldRef;
 class IFunctionBase;
 class IDataType;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field);
 }
--- a/src/Loggers/OwnJSONPatternFormatter.cpp
+++ b/src/Loggers/OwnJSONPatternFormatter.cpp
@ -118,7 +118,7 @@ void OwnJSONPatternFormatter::formatExtended(const DB::ExtendedLogMessage & msg_
        writeJSONString(level, wb, settings);
        DB::writeChar(':', wb);
        int priority = static_cast<int>(msg.getPriority());
-        writeJSONString(std::to_string(priority), wb, settings);
+        writeJSONString(getPriorityName(priority), wb, settings);
    }
    if (!query_id.empty())
--- a/src/Parsers/queryToString.cpp
+++ b/src/Parsers/queryToString.cpp
@ -3,11 +3,6 @@
 namespace DB
 {
    String queryToStringNullable(const ASTPtr & query)
    {
        return query ? queryToString(query) : "";
    }
    String queryToString(const ASTPtr & query)
    {
        return queryToString(*query);
--- a/src/Parsers/queryToString.h
+++ b/src/Parsers/queryToString.h
@ -6,5 +6,4 @@ namespace DB
 {
    String queryToString(const ASTPtr & query);
    String queryToString(const IAST & query);
    String queryToStringNullable(const ASTPtr & query);
 }
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@ -1391,7 +1391,7 @@ void Planner::buildPlanForQueryNode()
        }
    }
-    if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty())
+    if (!settings.parallel_replicas_custom_key.value.empty())
    {
        /// Check support for JOIN for parallel replicas with custom key
        if (planner_context->getTableExpressionNodeToData().size() > 1)
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@ -357,7 +357,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
    ParallelReplicasReadingCoordinatorPtr coordinator_,
    Block header_,
    QueryProcessingStage::Enum stage_,
    StorageID main_table_,
    ContextMutablePtr context_,
    ThrottlerPtr throttler_,
    Scalars scalars_,
@ -369,7 +368,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
    , query_ast(query_ast_)
    , coordinator(std::move(coordinator_))
    , stage(std::move(stage_))
    , main_table(std::move(main_table_))
    , context(context_)
    , throttler(throttler_)
    , scalars(scalars_)
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@ -76,7 +76,6 @@ public:
        ParallelReplicasReadingCoordinatorPtr coordinator_,
        Block header_,
        QueryProcessingStage::Enum stage_,
        StorageID main_table_,
        ContextMutablePtr context_,
        ThrottlerPtr throttler_,
        Scalars scalars_,
@ -99,7 +98,6 @@ private:
    ASTPtr query_ast;
    ParallelReplicasReadingCoordinatorPtr coordinator;
    QueryProcessingStage::Enum stage;
    StorageID main_table;
    ContextMutablePtr context;
    ThrottlerPtr throttler;
    Scalars scalars;
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
@ -37,7 +37,7 @@ namespace ErrorCodes
 AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS(
    IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr<ReadBufferFromHDFS> impl_)
-    : BufferWithOwnMemory<SeekableReadBuffer>(settings_.remote_fs_buffer_size)
+    : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
    , reader(reader_)
    , base_priority(settings_.priority)
    , impl(std::move(impl_))
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
+++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
@ -21,7 +21,7 @@ namespace DB
 class IAsynchronousReader;
-class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileName, public WithFileSize
+class AsynchronousReadBufferFromHDFS : public ReadBufferFromFileBase
 {
 public:
    AsynchronousReadBufferFromHDFS(
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -81,7 +81,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
    auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
    size_t minmax_idx_size = minmax_column_types.size();
    hyperrectangle.clear();
    hyperrectangle.reserve(minmax_idx_size);
    for (size_t i = 0; i < minmax_idx_size; ++i)
    {
@ -105,39 +104,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
    initialized = true;
 }
 Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const
 {
    if (!initialized)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index.");
    Block block;
    const auto metadata_snapshot = data.getInMemoryMetadataPtr();
    const auto & partition_key = metadata_snapshot->getPartitionKey();
    const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
    const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
    const auto minmax_idx_size = minmax_column_types.size();
    for (size_t i = 0; i < minmax_idx_size; ++i)
    {
        const auto & data_type = minmax_column_types[i];
        const auto & column_name = minmax_column_names[i];
        const auto column = data_type->createColumn();
        const auto min_val = hyperrectangle.at(i).left;
        const auto max_val = hyperrectangle.at(i).right;
        column->insert(min_val);
        column->insert(max_val);
        block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name));
    }
    return block;
 }
 IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store(
    const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const
 {
@ -219,7 +185,8 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
    if (!initialized)
    {
-        *this = other;
+        hyperrectangle = other.hyperrectangle;
        initialized = true;
    }
    else
    {
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -336,7 +336,6 @@ public:
        }
        void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager);
        Block getBlock(const MergeTreeData & data) const;
        using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@ -1,37 +1,36 @@
-#include <Columns/ColumnConst.h>
+#include <Storages/MergeTree/KeyCondition.h>
-#include <Columns/ColumnSet.h>
+#include <Storages/MergeTree/BoolMask.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/Utils.h>
 #include <DataTypes/getLeastSupertype.h>
-#include <Functions/CastOverloadResolver.h>
+#include <DataTypes/Utils.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <Functions/indexHint.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/Set.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/applyFunction.h>
+#include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/castColumn.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/misc.h>
-#include <Parsers/ASTIdentifier.h>
+#include <Functions/FunctionFactory.h>
-#include <Parsers/ASTLiteral.h>
+#include <Functions/indexHint.h>
-#include <Parsers/ASTSelectQuery.h>
+#include <Functions/CastOverloadResolver.h>
-#include <Parsers/queryToString.h>
+#include <Functions/IFunction.h>
 #include <Storages/MergeTree/BoolMask.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/MergeTree/MergeTreeIndexUtils.h>
 #include <Common/FieldVisitorToString.h>
 #include <Common/MortonUtils.h>
 #include <Common/typeid_cast.h>
 #include <Columns/ColumnSet.h>
 #include <Columns/ColumnConst.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/Set.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Storages/MergeTree/MergeTreeIndexUtils.h>
 #include <algorithm>
 #include <cassert>
@ -837,6 +836,21 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants
    return node.tryGetConstant(out_value, out_type);
 }
 static Field applyFunctionForField(
    const FunctionBasePtr & func,
    const DataTypePtr & arg_type,
    const Field & arg_value)
 {
    ColumnsWithTypeAndName columns
    {
        { arg_type->createColumnConst(1, arg_value), arg_type, "x" },
    };
    auto col = func->execute(columns, func->getResultType(), 1);
    return (*col)[0];
 }
 /// The case when arguments may have types different than in the primary key.
 static std::pair<Field, DataTypePtr> applyFunctionForFieldOfUnknownType(
    const FunctionBasePtr & func,
@ -876,6 +890,33 @@ static std::pair<Field, DataTypePtr> applyBinaryFunctionForFieldOfUnknownType(
    return {std::move(result), std::move(return_type)};
 }
 static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
 {
    /// Fallback for fields without block reference.
    if (field.isExplicit())
        return applyFunctionForField(func, current_type, field);
    String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
    const auto & columns = field.columns;
    size_t result_idx = columns->size();
    for (size_t i = 0; i < result_idx; ++i)
    {
        if ((*columns)[i].name == result_name)
            result_idx = i;
    }
    if (result_idx == columns->size())
    {
        ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
        field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name});
        (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
    }
    return {field.columns, field.row_idx, result_idx};
 }
 /** When table's key has expression with these functions from a column,
  * and when a column in a query is compared with a constant, such as:
  * CREATE TABLE (x String) ORDER BY toDate(x)
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -8,6 +8,21 @@
 #include <Backups/BackupEntryWrappedWith.h>
 #include <Backups/IBackup.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Increment.h>
 #include <Common/ProfileEventsScope.h>
 #include <Common/SimpleIncrement.h>
 #include <Common/Stopwatch.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/escapeForFileName.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/noexcept_scope.h>
 #include <Common/quoteString.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/typeid_cast.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeEnum.h>
@ -28,20 +43,19 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/inplaceBlockConversions.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTHelpers.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTHelpers.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTNameTypePair.h>
 #include <Parsers/ASTPartition.h>
@ -50,41 +64,26 @@
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/QueryPlan/QueryIdHolder.h>
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/BlockNumberColumn.h>
 #include <Storages/Freeze.h>
 #include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataPartCloner.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/Statistics/Estimator.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/MutationCommands.h>
 #include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Increment.h>
 #include <Common/ProfileEventsScope.h>
 #include <Common/SimpleIncrement.h>
 #include <Common/Stopwatch.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/escapeForFileName.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/noexcept_scope.h>
 #include <Common/quoteString.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/typeid_cast.h>
 #include <boost/range/algorithm_ext/erase.hpp>
 #include <boost/algorithm/string/join.hpp>
@ -198,50 +197,6 @@ namespace ErrorCodes
    extern const int LIMIT_EXCEEDED;
 }
 static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast)
 {
    if (partition_ast.fields_count.has_value())
        return *partition_ast.fields_count;
    if (partition_value_ast->as<ASTLiteral>())
        return 1;
    const auto * tuple_ast = partition_value_ast->as<ASTFunction>();
    if (!tuple_ast)
    {
        throw Exception(
            ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
    }
    if (tuple_ast->name != "tuple")
    {
        if (!isFunctionCast(tuple_ast))
            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
        if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
        auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
        if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
        {
            const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
            return arguments_ast ? arguments_ast->children.size() : 0;
        }
        else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
        {
            return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet<Tuple>().size() : 1;
        }
        throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
    }
    else
    {
        const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
        return arguments_ast ? arguments_ast->children.size() : 0;
    }
 }
 static void checkSuspiciousIndices(const ASTFunction * index_function)
 {
    std::unordered_set<UInt64> unique_index_expression_hashes;
@ -4899,7 +4854,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D
 }
 void MergeTreeData::checkAlterPartitionIsPossible(
-    const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const
+    const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const
 {
    for (const auto & command : commands)
    {
@ -4927,15 +4882,7 @@ void MergeTreeData::checkAlterPartitionIsPossible(
                        throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently");
                }
                else
-                {
+                    getPartitionIDFromQuery(command.partition, local_context);
                    // The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions
                    // are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right
                    // call to `getPartitionIDFromQuery` using source storage.
                    // Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries.
                    // But not for `ATTACH PARTITION` queries.
                    if (command.type != PartitionCommand::REPLACE_PARTITION)
                        getPartitionIDFromQuery(command.partition, getContext());
                }
            }
        }
    }
@ -5669,8 +5616,69 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
        MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version);
        return partition_ast.id->as<ASTLiteral>()->value.safeGet<String>();
    }
    size_t partition_ast_fields_count = 0;
    ASTPtr partition_value_ast = partition_ast.value->clone();
-    auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast);
+    if (!partition_ast.fields_count.has_value())
    {
        if (partition_value_ast->as<ASTLiteral>())
        {
            partition_ast_fields_count = 1;
        }
        else if (const auto * tuple_ast = partition_value_ast->as<ASTFunction>())
        {
            if (tuple_ast->name != "tuple")
            {
                if (isFunctionCast(tuple_ast))
                {
                    if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
                    {
                        throw Exception(
                            ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
                    }
                    auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
                    if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
                    {
                        const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
                        if (arguments_ast)
                            partition_ast_fields_count = arguments_ast->children.size();
                        else
                            partition_ast_fields_count = 0;
                    }
                    else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
                    {
                        if (inner_literal_tuple->value.getType() == Field::Types::Tuple)
                            partition_ast_fields_count = inner_literal_tuple->value.safeGet<Tuple>().size();
                        else
                            partition_ast_fields_count = 1;
                    }
                    else
                    {
                        throw Exception(
                            ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
                    }
                }
                else
                    throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
            }
            else
            {
                const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
                if (arguments_ast)
                    partition_ast_fields_count = arguments_ast->children.size();
                else
                    partition_ast_fields_count = 0;
            }
        }
        else
        {
            throw Exception(
                ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
        }
    }
    else
    {
        partition_ast_fields_count = *partition_ast.fields_count;
    }
    if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
    {
@ -7006,35 +7014,23 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour
    if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical()))
        throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure");
-    if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST()))
+    auto query_to_string = [] (const ASTPtr & ast)
    {
        return ast ? queryToString(ast) : "";
    };
    if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST()))
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering");
    if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST()))
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key");
    if (format_version != src_data->format_version)
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version");
-    if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST()))
+    if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST()))
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key");
    const auto is_a_subset_of = [](const auto & lhs, const auto & rhs)
    {
        if (lhs.size() > rhs.size())
            return false;
        const auto rhs_set = NameSet(rhs.begin(), rhs.end());
        for (const auto & lhs_element : lhs)
            if (!rhs_set.contains(lhs_element))
                return false;
        return true;
    };
    if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey()))
    {
        throw Exception(
            ErrorCodes::BAD_ARGUMENTS,
            "Destination table partition expression columns must be a subset of source table partition expression columns");
    }
    const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions)
    {
        if (my_descriptions.size() != src_descriptions.size())
@ -7075,56 +7071,128 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
    const ReadSettings & read_settings,
    const WriteSettings & write_settings)
 {
-    return MergeTreeDataPartCloner::clone(
+    /// Check that the storage policy contains the disk where the src_part is located.
-        this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings);
+    bool does_storage_policy_allow_same_disk = false;
-}
+    for (const DiskPtr & disk : getStoragePolicy()->getDisks())
    {
        if (disk->getName() == src_part->getDataPartStorage().getDiskName())
        {
            does_storage_policy_allow_same_disk = true;
            break;
        }
    }
    if (!does_storage_policy_allow_same_disk)
        throw Exception(
            ErrorCodes::BAD_ARGUMENTS,
            "Could not clone and load part {} because disk does not belong to storage policy",
            quoteString(src_part->getDataPartStorage().getFullPath()));
-std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
+    String dst_part_name = src_part->getNewName(dst_part_info);
-    const MergeTreeData::DataPartPtr & src_part,
+    String tmp_dst_part_name = tmp_part_prefix + dst_part_name;
-    const MergeTreePartition & new_partition,
+    auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name);
    const String & partition_id,
    const IMergeTreeDataPart::MinMaxIndex & min_max_index,
    const String & tmp_part_prefix,
    const StorageMetadataPtr & my_metadata_snapshot,
    const IDataPartStorage::ClonePartParams & clone_params,
    ContextPtr local_context,
    Int64 min_block,
    Int64 max_block
 )
 {
    MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level);
-    return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression(
+    /// Why it is needed if we only hardlink files?
-        this,
+    auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
-        src_part,
+    auto src_part_storage = src_part->getDataPartStoragePtr();
        my_metadata_snapshot,
        dst_part_info,
        tmp_part_prefix,
        local_context->getReadSettings(),
        local_context->getWriteSettings(),
        new_partition,
        min_max_index,
        false,
        clone_params);
 }
-std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart(
+    scope_guard src_flushed_tmp_dir_lock;
-    const MergeTreeData::DataPartPtr & src_part,
+    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
    const StorageMetadataPtr & metadata_snapshot,
    ContextPtr local_context)
 {
    const auto & src_data = src_part->storage;
-    auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(src_part.get());
+    /// If source part is in memory, flush it to disk and clone it already in on-disk format
-    IMergeTreeDataPart::MinMaxIndex min_max_index;
+    /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock
    /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor
    if (auto src_part_in_memory = asInMemoryPart(src_part))
    {
        auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
-    min_max_index.load(src_data, metadata_manager);
+        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
-    MergeTreePartition new_partition;
+        auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
-    new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context);
+        src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage)
            .withPartInfo(src_part->info)
            .withPartFormatFromDisk()
            .build();
-    return {new_partition, min_max_index};
+        src_flushed_tmp_part->is_temp = true;
        src_part_storage = flushed_part_storage;
    }
    String with_copy;
    if (params.copy_instead_of_hardlink)
        with_copy = " (copying data)";
    auto dst_part_storage = src_part_storage->freeze(
        relative_data_path,
        tmp_dst_part_name,
        read_settings,
        write_settings,
        /* save_metadata_callback= */ {},
        params);
    if (params.metadata_version_to_write.has_value())
    {
        chassert(!params.keep_metadata_version);
        auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings());
        writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
        out_metadata->finalize();
        if (getSettings()->fsync_after_insert)
            out_metadata->sync();
    }
    LOG_DEBUG(log, "Clone{} part {} to {}{}",
              src_flushed_tmp_part ? " flushed" : "",
              src_part_storage->getFullPath(),
              std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
              with_copy);
    auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage)
        .withPartFormatFromDisk()
        .build();
    if (!params.copy_instead_of_hardlink && params.hardlinked_files)
    {
        params.hardlinked_files->source_part_name = src_part->name;
        params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
        for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
        {
            if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
                && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
                && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
            {
                params.hardlinked_files->hardlinks_from_source_part.insert(it->name());
            }
        }
        auto projections = src_part->getProjectionParts();
        for (const auto & [name, projection_part] : projections)
        {
            const auto & projection_storage = projection_part->getDataPartStorage();
            for (auto it = projection_storage.iterate(); it->isValid(); it->next())
            {
                auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
                if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
                    && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
                    && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
                {
                    params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
                }
            }
        }
    }
    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
    TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
    dst_data_part->version.setCreationTID(tid, nullptr);
    dst_data_part->storeVersionMetadata();
    dst_data_part->is_temp = true;
    dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
    dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime();
    return std::make_pair(dst_data_part, std::move(temporary_directory_lock));
 }
 String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -231,7 +231,6 @@ public:
        }
    };
    using DataParts = std::set<DataPartPtr, LessDataPart>;
    using MutableDataParts = std::set<MutableDataPartPtr, LessDataPart>;
    using DataPartsVector = std::vector<DataPartPtr>;
@ -849,23 +848,6 @@ public:
        const ReadSettings & read_settings,
        const WriteSettings & write_settings);
    std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
        const MergeTreeData::DataPartPtr & src_part,
        const MergeTreePartition & new_partition,
        const String & partition_id,
        const IMergeTreeDataPart::MinMaxIndex & min_max_index,
        const String & tmp_part_prefix,
        const StorageMetadataPtr & my_metadata_snapshot,
        const IDataPartStorage::ClonePartParams & clone_params,
        ContextPtr local_context,
        Int64 min_block,
        Int64 max_block);
    static std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> createPartitionAndMinMaxIndexFromSourcePart(
        const MergeTreeData::DataPartPtr & src_part,
        const StorageMetadataPtr & metadata_snapshot,
        ContextPtr local_context);
    virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
    /// Returns true if table can create new parts with adaptive granularity
--- a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
@ -1,320 +0,0 @@
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataPartCloner.h>
 #include <Common/escapeForFileName.h>
 #include <Common/logger_useful.h>
 namespace DB
 {
 namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
 }
 static Poco::Logger * log = &Poco::Logger::get("MergeTreeDataPartCloner");
 namespace DistinctPartitionExpression
 {
 std::unique_ptr<WriteBufferFromFileBase> updatePartitionFile(
    const MergeTreeData & merge_tree_data,
    const MergeTreePartition & partition,
    const MergeTreeData::MutableDataPartPtr & dst_part,
    IDataPartStorage & storage)
 {
    storage.removeFile("partition.dat");
    // Leverage already implemented MergeTreePartition::store to create & store partition.dat.
    // Checksum is re-calculated later.
    return partition.store(merge_tree_data, storage, dst_part->checksums);
 }
 IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles(
    const MergeTreeData & merge_tree_data,
    const MergeTreeData::MutableDataPartPtr & dst_part,
    IDataPartStorage & storage,
    const StorageMetadataPtr & metadata_snapshot)
 {
    for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key))
    {
        auto file = "minmax_" + escapeForFileName(column_name) + ".idx";
        storage.removeFile(file);
    }
    return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums);
 }
 void finalizeNewFiles(const std::vector<std::unique_ptr<WriteBufferFromFileBase>> & files, bool sync_new_files)
 {
    for (const auto & file : files)
    {
        file->finalize();
        if (sync_new_files)
            file->sync();
    }
 }
 void updateNewPartFiles(
    const MergeTreeData & merge_tree_data,
    const MergeTreeData::MutableDataPartPtr & dst_part,
    const MergeTreePartition & new_partition,
    const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
    const StorageMetadataPtr & src_metadata_snapshot,
    bool sync_new_files)
 {
    auto & storage = dst_part->getDataPartStorage();
    *dst_part->minmax_idx = new_min_max_index;
    auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage);
    auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot);
    IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files;
    if (partition_file)
        written_files.emplace_back(std::move(partition_file));
    written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end()));
    finalizeNewFiles(written_files, sync_new_files);
    // MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create
    // the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction
    // with small improvements. It can be further improved in the future.
    storage.removeFile("checksums.txt");
 }
 }
 namespace
 {
 bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part)
 {
    for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks())
        if (disk->getName() == src_part->getDataPartStorage().getDiskName())
            return true;
    return false;
 }
 DataPartStoragePtr flushPartStorageToDiskIfInMemory(
    MergeTreeData * merge_tree_data,
    const MergeTreeData::DataPartPtr & src_part,
    const StorageMetadataPtr & metadata_snapshot,
    const String & tmp_part_prefix,
    const String & tmp_dst_part_name,
    scope_guard & src_flushed_tmp_dir_lock,
    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part)
 {
    if (auto src_part_in_memory = asInMemoryPart(src_part))
    {
        auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
        auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot);
        src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage)
                                   .withPartInfo(src_part->info)
                                   .withPartFormatFromDisk()
                                   .build();
        src_flushed_tmp_part->is_temp = true;
        return flushed_part_storage;
    }
    return src_part->getDataPartStoragePtr();
 }
 std::shared_ptr<IDataPartStorage> hardlinkAllFiles(
    MergeTreeData * merge_tree_data,
    const DB::ReadSettings & read_settings,
    const DB::WriteSettings & write_settings,
    const DataPartStoragePtr & storage,
    const String & path,
    const DB::IDataPartStorage::ClonePartParams & params)
 {
    return storage->freeze(
        merge_tree_data->getRelativeDataPath(),
        path,
        read_settings,
        write_settings,
        /*save_metadata_callback=*/{},
        params);
 }
 std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneSourcePart(
    MergeTreeData * merge_tree_data,
    const MergeTreeData::DataPartPtr & src_part,
    const StorageMetadataPtr & metadata_snapshot,
    const MergeTreePartInfo & dst_part_info,
    const String & tmp_part_prefix,
    const ReadSettings & read_settings,
    const WriteSettings & write_settings,
    const DB::IDataPartStorage::ClonePartParams & params)
 {
    const auto dst_part_name = src_part->getNewName(dst_part_info);
    const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name;
    auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name);
    src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
    scope_guard src_flushed_tmp_dir_lock;
    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
    auto src_part_storage = flushPartStorageToDiskIfInMemory(
        merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part);
    auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params);
    if (params.metadata_version_to_write.has_value())
    {
        chassert(!params.keep_metadata_version);
        auto out_metadata = dst_part_storage->writeFile(
            IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings());
        writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
        out_metadata->finalize();
        if (merge_tree_data->getSettings()->fsync_after_insert)
            out_metadata->sync();
    }
    LOG_DEBUG(
        log,
        "Clone {} part {} to {}{}",
        src_flushed_tmp_part ? "flushed" : "",
        src_part_storage->getFullPath(),
        std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
        false);
    auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build();
    return std::make_pair(part, std::move(temporary_directory_lock));
 }
 void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
 {
    const auto & hardlinked_files = params.hardlinked_files;
    hardlinked_files->source_part_name = src_part->name;
    hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
    for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
    {
        if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
            && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
            && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
        {
            hardlinked_files->hardlinks_from_source_part.insert(it->name());
        }
    }
 }
 void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
 {
    auto projections = src_part->getProjectionParts();
    for (const auto & [name, projection_part] : projections)
    {
        const auto & projection_storage = projection_part->getDataPartStorage();
        for (auto it = projection_storage.iterate(); it->isValid(); it->next())
        {
            auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
            if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
                && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
                && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
            {
                params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
            }
        }
    }
 }
 MergeTreeData::MutableDataPartPtr finalizePart(
    const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata)
 {
    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
    TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
    dst_part->version.setCreationTID(tid, nullptr);
    dst_part->storeVersionMetadata();
    dst_part->is_temp = true;
    dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
    dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime();
    return dst_part;
 }
 std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> cloneAndHandleHardlinksAndProjections(
    MergeTreeData * merge_tree_data,
    const DataPartPtr & src_part,
    const StorageMetadataPtr & metadata_snapshot,
    const MergeTreePartInfo & dst_part_info,
    const String & tmp_part_prefix,
    const ReadSettings & read_settings,
    const WriteSettings & write_settings,
    const IDataPartStorage::ClonePartParams & params)
 {
    if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part))
        throw Exception(
            ErrorCodes::BAD_ARGUMENTS,
            "Could not clone and load part {} because disk does not belong to storage policy",
            quoteString(src_part->getDataPartStorage().getFullPath()));
    auto [destination_part, temporary_directory_lock] = cloneSourcePart(
        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
    if (!params.copy_instead_of_hardlink && params.hardlinked_files)
    {
        handleHardLinkedParameterFiles(src_part, params);
        handleProjections(src_part, params);
    }
    return std::make_pair(destination_part, std::move(temporary_directory_lock));
 }
 }
 std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::clone(
    MergeTreeData * merge_tree_data,
    const DataPartPtr & src_part,
    const StorageMetadataPtr & metadata_snapshot,
    const MergeTreePartInfo & dst_part_info,
    const String & tmp_part_prefix,
    bool require_part_metadata,
    const IDataPartStorage::ClonePartParams & params,
    const ReadSettings & read_settings,
    const WriteSettings & write_settings)
 {
    auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
    return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock));
 }
 std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression(
    MergeTreeData * merge_tree_data,
    const DataPartPtr & src_part,
    const StorageMetadataPtr & metadata_snapshot,
    const MergeTreePartInfo & dst_part_info,
    const String & tmp_part_prefix,
    const ReadSettings & read_settings,
    const WriteSettings & write_settings,
    const MergeTreePartition & new_partition,
    const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
    bool sync_new_files,
    const IDataPartStorage::ClonePartParams & params)
 {
    auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
    DistinctPartitionExpression::updateNewPartFiles(
        *merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files);
    return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock));
 }
 }
--- a/src/Storages/MergeTree/MergeTreeDataPartCloner.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.h
@ -1,43 +0,0 @@
 #pragma once
 namespace DB
 {
 struct StorageInMemoryMetadata;
 using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 struct MergeTreePartition;
 class IMergeTreeDataPart;
 class MergeTreeDataPartCloner
 {
 public:
    using DataPart = IMergeTreeDataPart;
    using MutableDataPartPtr = std::shared_ptr<DataPart>;
    using DataPartPtr = std::shared_ptr<const DataPart>;
    static std::pair<MutableDataPartPtr, scope_guard> clone(
        MergeTreeData * merge_tree_data,
        const DataPartPtr & src_part,
        const StorageMetadataPtr & metadata_snapshot,
        const MergeTreePartInfo & dst_part_info,
        const String & tmp_part_prefix,
        bool require_part_metadata,
        const IDataPartStorage::ClonePartParams & params,
        const ReadSettings & read_settings,
        const WriteSettings & write_settings);
    static std::pair<MutableDataPartPtr, scope_guard> cloneWithDistinctPartitionExpression(
        MergeTreeData * merge_tree_data,
        const DataPartPtr & src_part,
        const StorageMetadataPtr & metadata_snapshot,
        const MergeTreePartInfo & dst_part_info,
        const String & tmp_part_prefix,
        const ReadSettings & read_settings,
        const WriteSettings & write_settings,
        const MergeTreePartition & new_partition,
        const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
        bool sync_new_files,
        const IDataPartStorage::ClonePartParams & params);
 };
 }
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@ -467,45 +467,6 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl
    }
 }
 void MergeTreePartition::createAndValidateMinMaxPartitionIds(
    const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context)
 {
    if (!metadata_snapshot->hasPartitionKey())
        return;
    auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context);
    value.resize(partition_key_names_and_types.size());
    /// Executing partition_by expression adds new columns to passed block according to partition functions.
    /// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back
    /// with just `modulo`, because it was a temporary substitution.
    static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy";
    size_t i = 0;
    for (const auto & element : partition_key_names_and_types)
    {
        auto & partition_column = block_with_min_max_partition_ids.getByName(element.name);
        if (element.name.starts_with(modulo_legacy_function_name))
            partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo");
        Field extracted_min_partition_id_field;
        Field extracted_max_partition_id_field;
        partition_column.column->get(0, extracted_min_partition_id_field);
        partition_column.column->get(1, extracted_max_partition_id_field);
        if (extracted_min_partition_id_field != extracted_max_partition_id_field)
        {
            throw Exception(
                ErrorCodes::INVALID_PARTITION_VALUE,
                "Can not create the partition. A partition can not contain values that have different partition ids");
        }
        partition_column.column->get(0u, value[i++]);
    }
 }
 NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context)
 {
    auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context);
--- a/src/Storages/MergeTree/MergeTreePartition.h
+++ b/src/Storages/MergeTree/MergeTreePartition.h
@ -1,12 +1,11 @@
 #pragma once
-#include <Core/Field.h>
+#include <base/types.h>
 #include <Disks/IDisk.h>
 #include <IO/WriteBuffer.h>
 #include <Storages/KeyDescription.h>
 #include <Storages/MergeTree/IPartMetadataManager.h>
-#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
+#include <Core/Field.h>
 #include <base/types.h>
 namespace DB
 {
@ -52,11 +51,6 @@ public:
    void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context);
    /// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different,
    /// it means the partition can't be created because the data doesn't belong to the same partition.
    void createAndValidateMinMaxPartitionIds(
        const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context);
    static void appendFiles(const MergeTreeData & storage, Strings & files);
    /// Adjust partition key and execute its expression on block. Return sample block according to used expression.
--- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp
+++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp
@ -1,91 +0,0 @@
 #include <Interpreters/MonotonicityCheckVisitor.h>
 #include <Interpreters/getTableExpressions.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
 #include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
 namespace DB
 {
 namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
 }
 namespace
 {
 bool isDestinationPartitionExpressionMonotonicallyIncreasing(
    const std::vector<Range> & hyperrectangle, const MergeTreeData & destination_storage)
 {
    auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr();
    auto key_description = destination_table_metadata->getPartitionKey();
    auto definition_ast = key_description.definition_ast->clone();
    auto table_identifier = std::make_shared<ASTIdentifier>(destination_storage.getStorageID().getTableName());
    auto table_with_columns
        = TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()};
    auto expression_list = extractKeyExpressionList(definition_ast);
    MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}};
    for (auto i = 0u; i < expression_list->children.size(); i++)
    {
        data.range = hyperrectangle[i];
        MonotonicityCheckVisitor(data).visit(expression_list->children[i]);
        if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive)
            return false;
    }
    return true;
 }
 bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination)
 {
    auto source_expression_list = extractKeyExpressionList(source);
    auto destination_expression_list = extractKeyExpressionList(destination);
    std::unordered_set<std::string> source_columns;
    for (auto i = 0u; i < source_expression_list->children.size(); ++i)
        source_columns.insert(source_expression_list->children[i]->getColumnName());
    for (auto i = 0u; i < destination_expression_list->children.size(); ++i)
        if (!source_columns.contains(destination_expression_list->children[i]->getColumnName()))
            return false;
    return true;
 }
 }
 void MergeTreePartitionCompatibilityVerifier::verify(
    const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts)
 {
    const auto source_metadata = source_storage.getInMemoryMetadataPtr();
    const auto destination_metadata = destination_storage.getInMemoryMetadataPtr();
    const auto source_partition_key_ast = source_metadata->getPartitionKeyAST();
    const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST();
    // If destination partition expression columns are a subset of source partition expression columns,
    // there is no need to check for monotonicity.
    if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast))
        return;
    const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage);
    assert(!src_global_min_max_indexes.hyperrectangle.empty());
    if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage))
        throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing");
    MergeTreePartition().createAndValidateMinMaxPartitionIds(
        destination_storage.getInMemoryMetadataPtr(),
        src_global_min_max_indexes.getBlock(destination_storage),
        destination_storage.getContext());
 }
 }
--- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h
+++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h
@ -1,30 +0,0 @@
 #pragma once
 #include <Core/Field.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 namespace DB
 {
 /*
 * Verifies that source and destination partitions are compatible.
 * To be compatible, one of the following criteria must be met:
 * 1. Destination partition expression columns are a subset of source partition columns; or
 * 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for
 * the source global min_max idx range is the same.
 *
 * If not, an exception is thrown.
 * */
 class MergeTreePartitionCompatibilityVerifier
 {
 public:
    using DataPart = IMergeTreeDataPart;
    using DataPartPtr = std::shared_ptr<const DataPart>;
    using DataPartsVector = std::vector<DataPartPtr>;
    static void
    verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts);
 };
 }
--- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp
+++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp
@ -1,25 +0,0 @@
 #include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
 namespace DB
 {
 IMergeTreeDataPart::MinMaxIndex
 MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage)
 {
    IMergeTreeDataPart::MinMaxIndex global_min_max_indexes;
    for (const auto & part : parts)
    {
        auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(part.get());
        auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex();
        local_min_max_index.load(storage, metadata_manager);
        global_min_max_indexes.merge(local_min_max_index);
    }
    return global_min_max_indexes;
 }
 }
--- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h
+++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h
@ -1,24 +0,0 @@
 #pragma once
 #include <utility>
 #include <Core/Field.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 namespace DB
 {
 /*
 * Calculates global min max indexes for a given set of parts on given storage.
 * */
 class MergeTreePartitionGlobalMinMaxIdxCalculator
 {
    using DataPart = IMergeTreeDataPart;
    using DataPartPtr = std::shared_ptr<const DataPart>;
    using DataPartsVector = std::vector<DataPartPtr>;
 public:
    static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage);
 };
 }
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -5,9 +5,9 @@
 #include <optional>
 #include <ranges>
 #include <base/sort.h>
 #include <Backups/BackupEntriesCollector.h>
 #include <Databases/IDatabase.h>
 #include <IO/copyData.h>
 #include "Common/Exception.h"
 #include <Common/MemoryTracker.h>
 #include <Common/escapeForFileName.h>
@ -20,30 +20,27 @@
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <IO/copyData.h>
 #include <Parsers/ASTCheckQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTPartition.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/formatAST.h>
 #include <Planner/Utils.h>
 #include <Storages/buildQueryTreeForShard.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/MergePlainMergeTreeTask.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
 #include <Storages/MergeTree/MergeTreeSink.h>
 #include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
 #include <Storages/MergeTree/PartitionPruner.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/PartitionCommands.h>
-#include <base/sort.h>
+#include <Storages/MergeTree/MergeTreeSink.h>
-#include <Storages/buildQueryTreeForShard.h>
+#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergePlainMergeTreeTask.h>
 #include <Storages/MergeTree/PartitionPruner.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <QueryPipeline/Pipe.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@ -218,16 +215,25 @@ void StorageMergeTree::read(
 {
    if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
    {
-        const auto table_id = getStorageID();
+        ASTPtr modified_query_ast;
        const auto & modified_query_ast =  ClusterProxy::rewriteSelectQuery(
            local_context, query_info.query,
            table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
        Block header;
        if (local_context->getSettingsRef().allow_experimental_analyzer)
-            header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze());
+        {
            QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
            rewriteJoinToGlobalJoin(modified_query_tree);
            modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
            header = InterpreterSelectQueryAnalyzer::getSampleBlock(
                modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
            modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
        }
        else
-            header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
+        {
            const auto table_id = getStorageID();
            modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
                table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
            header
                = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
        }
        ClusterProxy::SelectStreamFactory select_stream_factory =
            ClusterProxy::SelectStreamFactory(
@ -238,7 +244,6 @@ void StorageMergeTree::read(
        ClusterProxy::executeQueryWithParallelReplicas(
            query_plan,
            getStorageID(),
            select_stream_factory,
            modified_query_ast,
            local_context,
@ -2044,74 +2049,42 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
    ProfileEventsScope profile_events_scope;
    MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot);
-    String partition_id = src_data.getPartitionIDFromQuery(partition, local_context);
+    String partition_id = getPartitionIDFromQuery(partition, local_context);
    DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id);
    bool attach_empty_partition = !replace && src_parts.empty();
    if (attach_empty_partition)
        return;
    MutableDataPartsVector dst_parts;
    std::vector<scope_guard> dst_parts_locks;
    static const String TMP_PREFIX = "tmp_replace_from_";
-    const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST();
+    for (const DataPartPtr & src_part : src_parts)
    const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
    const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
    if (is_partition_exp_different && !src_parts.empty())
        MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts);
    for (DataPartPtr & src_part : src_parts)
    {
        if (!canReplacePartition(src_part))
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
                            "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table",
                            partition_id, src_part->name);
        IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
        /// This will generate unique name in scope of current server process.
-        auto index = insert_increment.get();
+        Int64 temp_index = insert_increment.get();
        MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
-        if (is_partition_exp_different)
+        IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
-        {
+        auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
-            auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(
+            src_part,
-                src_part, my_metadata_snapshot, local_context);
+            TMP_PREFIX,
-
+            dst_part_info,
-            auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
+            my_metadata_snapshot,
-                src_part,
+            clone_params,
-                new_partition,
+            local_context->getReadSettings(),
-                new_partition.getID(*this),
+            local_context->getWriteSettings());
-                new_min_max_index,
+        dst_parts.emplace_back(std::move(dst_part));
-                TMP_PREFIX,
+        dst_parts_locks.emplace_back(std::move(part_lock));
                my_metadata_snapshot,
                clone_params,
                local_context,
                index,
                index);
            dst_parts.emplace_back(std::move(dst_part));
            dst_parts_locks.emplace_back(std::move(part_lock));
        }
        else
        {
            MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
                src_part,
                TMP_PREFIX,
                dst_part_info,
                my_metadata_snapshot,
                clone_params,
                local_context->getReadSettings(),
                local_context->getWriteSettings());
            dst_parts.emplace_back(std::move(dst_part));
            dst_parts_locks.emplace_back(std::move(part_lock));
        }
    }
    /// ATTACH empty part set
    if (!replace && dst_parts.empty())
        return;
    MergeTreePartInfo drop_range;
    if (replace)
    {
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -26,21 +26,22 @@
 #include <base/sort.h>
 #include <Storages/buildQueryTreeForShard.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/Freeze.h>
 #include <Storages/MergeTree/AsyncBlockIDsCache.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/LeaderElection.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergeFromLogEntryTask.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
 #include <Storages/MergeTree/MergeTreeReaderCompact.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MutateFromLogEntryTask.h>
 #include <Storages/MergeTree/PinnedPartUUIDs.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeAddress.h>
@ -52,11 +53,9 @@
 #include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
 #include <Storages/MergeTree/ZeroCopyLock.h>
 #include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <Storages/PartitionCommands.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/buildQueryTreeForShard.h>
 #include <Databases/DatabaseOnDisk.h>
 #include <Databases/DatabaseReplicated.h>
@ -2714,48 +2713,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
                .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
            };
-
+            auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
-            const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST();
+                part_desc->src_table_part,
-            const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST();
+                TMP_PREFIX + "clone_",
-
+                part_desc->new_part_info,
-            const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
+                metadata_snapshot,
-
+                clone_params,
-            if (is_partition_exp_different)
+                getContext()->getReadSettings(),
-            {
+                getContext()->getWriteSettings());
-                auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(
+            part_desc->res_part = std::move(res_part);
-                    part_desc->src_table_part, metadata_snapshot, getContext());
+            part_desc->temporary_part_lock = std::move(temporary_part_lock);
                auto partition_id = new_partition.getID(*this);
                auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
                    part_desc->src_table_part,
                    new_partition,
                    partition_id,
                    new_min_max_index,
                    TMP_PREFIX + "clone_",
                    metadata_snapshot,
                    clone_params,
                    getContext(),
                    part_desc->new_part_info.min_block,
                    part_desc->new_part_info.max_block);
                part_desc->res_part = std::move(res_part);
                part_desc->temporary_part_lock = std::move(temporary_part_lock);
            }
            else
            {
                auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
                    part_desc->src_table_part,
                    TMP_PREFIX + "clone_",
                    part_desc->new_part_info,
                    metadata_snapshot,
                    clone_params,
                    getContext()->getReadSettings(),
                    getContext()->getWriteSettings());
                part_desc->res_part = std::move(res_part);
                part_desc->temporary_part_lock = std::move(temporary_part_lock);
            }
        }
        else if (!part_desc->replica.empty())
        {
@ -5418,7 +5385,9 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
    if (local_context->getSettingsRef().allow_experimental_analyzer)
    {
-        auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
+        QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
        rewriteJoinToGlobalJoin(modified_query_tree);
        modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
        header = InterpreterSelectQueryAnalyzer::getSampleBlock(
            modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
@ -5441,7 +5410,6 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
    ClusterProxy::executeQueryWithParallelReplicas(
        query_plan,
        getStorageID(),
        select_stream_factory,
        modified_query_ast,
        local_context,
@ -7885,22 +7853,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
    ProfileEventsScope profile_events_scope;
    MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot);
-    String partition_id = src_data.getPartitionIDFromQuery(partition, query_context);
+    String partition_id = getPartitionIDFromQuery(partition, query_context);
    /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet.
    DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id);
    bool attach_empty_partition = !replace && src_all_parts.empty();
    if (attach_empty_partition)
        return;
    const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST();
    const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
    const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
    if (is_partition_exp_different && !src_all_parts.empty())
        MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts);
    LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size());
    static const String TMP_PREFIX = "tmp_replace_from_";
@ -7955,18 +7912,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                                "Cannot replace partition '{}' because part '{}"
                                "' has inconsistent granularity with table", partition_id, src_part->name);
            IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx;
            MergeTreePartition merge_tree_partition = src_part->partition;
            if (is_partition_exp_different)
            {
                auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context);
                merge_tree_partition = new_partition;
                min_max_index = new_min_max_index;
                partition_id = merge_tree_partition.getID(*this);
            }
            String hash_hex = src_part->checksums.getTotalChecksumHex();
            const bool is_duplicated_part = replaced_parts.contains(hash_hex);
            replaced_parts.insert(hash_hex);
@ -7985,52 +7930,27 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                continue;
            }
            UInt64 index = lock->getNumber();
            MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
            bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
                || dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
            UInt64 index = lock->getNumber();
            IDataPartStorage::ClonePartParams clone_params
            {
                .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
            };
-
+            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
-            if (is_partition_exp_different)
+                src_part,
-            {
+                TMP_PREFIX,
-                auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
+                dst_part_info,
-                    src_part,
+                metadata_snapshot,
-                    merge_tree_partition,
+                clone_params,
-                    partition_id,
+                query_context->getReadSettings(),
-                    min_max_index,
+                query_context->getWriteSettings());
                    TMP_PREFIX,
                    metadata_snapshot,
                    clone_params,
                    query_context,
                    index,
                    index);
                dst_parts.emplace_back(dst_part);
                dst_parts_locks.emplace_back(std::move(part_lock));
            }
            else
            {
                MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
                auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
                    src_part,
                    TMP_PREFIX,
                    dst_part_info,
                    metadata_snapshot,
                    clone_params,
                    query_context->getReadSettings(),
                    query_context->getWriteSettings());
                dst_parts.emplace_back(dst_part);
                dst_parts_locks.emplace_back(std::move(part_lock));
            }
            src_parts.emplace_back(src_part);
            dst_parts.emplace_back(dst_part);
            dst_parts_locks.emplace_back(std::move(part_lock));
            ephemeral_locks.emplace_back(std::move(*lock));
            block_id_paths.emplace_back(block_id_path);
            part_checksums.emplace_back(hash_hex);
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@ -373,11 +373,37 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
    removeGroupingFunctionSpecializations(query_tree_to_modify);
    // std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl;
    createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
    // std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl;
    return query_tree_to_modify;
 }
 class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>
 {
 public:
    using Base = InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>;
    using Base::Base;
    void visitImpl(QueryTreeNodePtr & node)
    {
        if (auto * join_node = node->as<JoinNode>())
            join_node->setLocality(JoinLocality::Global);
    }
    static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
    {
        auto * join_node = parent->as<JoinNode>();
        if (join_node && join_node->getRightTableExpression() == child)
            return false;
        return true;
    }
 };
 void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify)
 {
    RewriteJoinToGlobalJoinVisitor visitor;
    visitor.visit(query_tree_to_modify);
 }
 }
--- a/src/Storages/buildQueryTreeForShard.h
+++ b/src/Storages/buildQueryTreeForShard.h
@ -12,4 +12,6 @@ using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
 QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify);
 void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify);
 }
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@ -27,8 +27,9 @@
 00917_multiple_joins_denny_crane
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
 # Check after constants refactoring
 02901_parallel_replicas_rollup
 # Flaky. Please don't delete them without fixing them:
 01287_max_execution_speed
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
--- a/tests/integration/test_attach_partition_distinct_expression_replicated/init.py
+++ b/tests/integration/test_attach_partition_distinct_expression_replicated/init.py
--- a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml
+++ b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml
@ -1,17 +0,0 @@
 <clickhouse>
    <remote_servers>
        <test_cluster>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>replica1</host>
                    <port>9000</port>
                </replica>
                <replica>
                    <host>replica2</host>
                    <port>9000</port>
                </replica>
            </shard>
        </test_cluster>
    </remote_servers>
 </clickhouse>
--- a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py
+++ b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py
@ -1,214 +0,0 @@
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry
 cluster = ClickHouseCluster(__file__)
 replica1 = cluster.add_instance(
    "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
 )
 replica2 = cluster.add_instance(
    "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
 )
@pytest.fixture(scope="module")
 def start_cluster():
    try:
        cluster.start()
        yield cluster
    except Exception as ex:
        print(ex)
    finally:
        cluster.shutdown()
 def cleanup(nodes):
    for node in nodes:
        node.query("DROP TABLE IF EXISTS source SYNC")
        node.query("DROP TABLE IF EXISTS destination SYNC")
 def create_table(node, table_name, replicated):
    replica = node.name
    engine = (
        f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')"
        if replicated
        else "MergeTree()"
    )
    partition_expression = (
        "toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)"
    )
    node.query_with_retry(
        """
        CREATE TABLE {table_name}(timestamp DateTime)
        ENGINE = {engine}
        ORDER BY tuple() PARTITION BY {partition_expression}
        SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
        """.format(
            table_name=table_name,
            engine=engine,
            partition_expression=partition_expression,
        )
    )
 def test_both_replicated(start_cluster):
    for node in [replica1, replica2]:
        create_table(node, "source", True)
        create_table(node, "destination", True)
    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
    replica1.query("SYSTEM SYNC REPLICA source")
    replica1.query("SYSTEM SYNC REPLICA destination")
    replica1.query(
        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
    )
    assert_eq_with_retry(
        replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
    )
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination",
        replica2.query(f"SELECT * FROM destination"),
    )
    cleanup([replica1, replica2])
 def test_only_destination_replicated(start_cluster):
    create_table(replica1, "source", False)
    create_table(replica1, "destination", True)
    create_table(replica2, "destination", True)
    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
    replica1.query("SYSTEM SYNC REPLICA destination")
    replica1.query(
        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
    )
    assert_eq_with_retry(
        replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
    )
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination",
        replica2.query(f"SELECT * FROM destination"),
    )
    cleanup([replica1, replica2])
 def test_both_replicated_partitioned_to_unpartitioned(start_cluster):
    def create_tables(nodes):
        for node in nodes:
            source_engine = (
                f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
            )
            node.query(
                """
                CREATE TABLE source(timestamp DateTime)
                ENGINE = {engine}
                ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp)
                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
                """.format(
                    engine=source_engine,
                )
            )
            destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
            node.query(
                """
                CREATE TABLE destination(timestamp DateTime)
                ENGINE = {engine}
                ORDER BY tuple() PARTITION BY tuple()
                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
                """.format(
                    engine=destination_engine,
                )
            )
    create_tables([replica1, replica2])
    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
    replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')")
    replica1.query("SYSTEM SYNC REPLICA source")
    replica1.query("SYSTEM SYNC REPLICA destination")
    replica1.query(
        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
    )
    replica1.query(
        f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source"
    )
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination ORDER BY timestamp",
        "2010-03-02 02:01:01\n2010-03-03 02:01:01\n",
    )
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination ORDER BY timestamp",
        replica2.query(f"SELECT * FROM destination ORDER BY timestamp"),
    )
    cleanup([replica1, replica2])
 def test_both_replicated_different_exp_same_id(start_cluster):
    def create_tables(nodes):
        for node in nodes:
            source_engine = (
                f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
            )
            node.query(
                """
                CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
                ENGINE = {engine}
                ORDER BY tuple() PARTITION BY a % 3
                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
                """.format(
                    engine=source_engine,
                )
            )
            destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
            node.query(
                """
                CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
                ENGINE = {engine}
                ORDER BY tuple() PARTITION BY a
                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
                """.format(
                    engine=destination_engine,
                )
            )
    create_tables([replica1, replica2])
    replica1.query(
        "INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)"
    )
    replica1.query(
        "INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)"
    )
    replica1.query("SYSTEM SYNC REPLICA source")
    replica1.query("SYSTEM SYNC REPLICA destination")
    replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source")
    replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source")
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination ORDER BY a",
        "1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n",
    )
    assert_eq_with_retry(
        replica1,
        f"SELECT * FROM destination ORDER BY a",
        replica2.query(f"SELECT * FROM destination ORDER BY a"),
    )
    cleanup([replica1, replica2])
--- a/tests/integration/test_structured_logging_json/test.py
+++ b/tests/integration/test_structured_logging_json/test.py
@ -32,6 +32,30 @@ def is_json(log_json):
    return True
 def validate_log_level(config, logs):
    root = ET.fromstring(config)
    key = root.findtext(".//names/level") or "level"
    valid_level_values = {
        "Fatal",
        "Critical",
        "Error",
        "Warning",
        "Notice",
        "Information",
        "Debug",
        "Trace",
        "Test",
    }
    length = min(10, len(logs))
    for i in range(0, length):
        json_log = json.loads(logs[i])
        if json_log[key] not in valid_level_values:
            return False
    return True
 def validate_log_config_relation(config, logs, config_type):
    root = ET.fromstring(config)
    keys_in_config = set()
@ -78,8 +102,10 @@ def validate_logs(logs):
 def valiade_everything(config, node, config_type):
    node.query("SELECT 1")
    logs = node.grep_in_log("").split("\n")
-    return validate_logs(logs) and validate_log_config_relation(
+    return (
-        config, logs, config_type
+        validate_logs(logs)
        and validate_log_config_relation(config, logs, config_type)
        and validate_log_level(config, logs)
    )
--- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference
+++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference
@ -1,467 +0,0 @@
 -- { echoOn }
 -- Should be allowed since destination partition expr is monotonically increasing and compatible
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 -- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though
 -- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 20100302
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 20100302
 -- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
 CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1);
 ALTER TABLE destination ATTACH PARTITION ID '0' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	1
 2010-03-02 02:01:03	1
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	1
 2010-03-02 02:01:03	1
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION 0 FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	1
 2010-03-02 02:01:03	1
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	1
 2010-03-02 02:01:03	1
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1
 -- Should be allowed because dst partition exp is monot inc and data is not split
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source;
 SELECT * FROM source ORDER BY productName;
 mop	general
 rice	food
 spaghetti	food
 SELECT * FROM destination ORDER BY productName;
 rice	food
 spaghetti	food
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 59532f3c39a412a413f0f014c7750a9d
 59532f3c39a412a413f0f014c7750a9d
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source;
 SELECT * FROM source ORDER BY productName;
 mop	general
 rice	food
 spaghetti	food
 SELECT * FROM destination ORDER BY productName;
 rice	food
 spaghetti	food
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 59532f3c39a412a413f0f014c7750a9d
 59532f3c39a412a413f0f014c7750a9d
 -- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000);
 CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000)));
 INSERT INTO TABLE source VALUES (1267495261123);
 ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 1267495261123
 SELECT * FROM destination ORDER BY timestamp;
 1267495261123
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 2010
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '14670' from source;
 SELECT * FROM source ORDER BY timestamp;
 1267495261123
 SELECT * FROM destination ORDER BY timestamp;
 1267495261123
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 2010
 -- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp);
 CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400));
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1);
 ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	1	1
 2010-03-02 02:01:01	1	1
 2011-02-02 02:01:03	1	1
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	1	1
 2010-03-02 02:01:01	1	1
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 14670
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '2010' from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	1	1
 2010-03-02 02:01:01	1	1
 2011-02-02 02:01:03	1	1
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	1	1
 2010-03-02 02:01:01	1	1
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 14670
 -- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately
 -- fall into the same partition.
 -- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 all
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 all
 -- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that
 -- partition by tuple() is accepted.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 all
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 all
 -- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
 -- Columns in this case refer to the expression elements, not to the actual table columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
 CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b);
 INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
 ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
 SELECT * FROM source ORDER BY (a, b, c);
 1	2	3
 1	2	4
 SELECT * FROM destination ORDER BY (a, b, c);
 1	2	3
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1-2
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
 SELECT * FROM source ORDER BY (a, b, c);
 1	2	3
 1	2	4
 SELECT * FROM destination ORDER BY (a, b, c);
 1	2	3
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1-2
 -- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
 -- Columns in this case refer to the expression elements, not to the actual table columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
 CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a;
 INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
 ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
 SELECT * FROM source ORDER BY (a, b, c);
 1	2	3
 1	2	4
 SELECT * FROM destination ORDER BY (a, b, c);
 1	2	3
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
 SELECT * FROM source ORDER BY (a, b, c);
 1	2	3
 1	2	4
 SELECT * FROM destination ORDER BY (a, b, c);
 1	2	3
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 1
 -- Should be allowed. Special test case, tricky to explain. First column of source partition expression is
 -- timestamp, while first column of destination partition expression is `A`. One of the previous implementations
 -- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp;
 CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5);
 ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	5
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	5
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 5
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (201003, 0) from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01	5
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01	5
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 5
 -- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically
 -- increasing in the source partition min max indexes.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
 CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
 INSERT INTO TABLE source VALUES (6, 12);
 ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source;
 SELECT * FROM source ORDER BY A;
 6	12
 SELECT * FROM destination ORDER BY A;
 6	12
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 3-6
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (6, 12) from source;
 SELECT * FROM source ORDER BY A;
 6	12
 SELECT * FROM destination ORDER BY A;
 6	12
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 3-6
 -- Should be allowed. The same scenario as above, but partition expressions inverted.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
 CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
 INSERT INTO TABLE source VALUES (6, 12);
 ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source;
 SELECT * FROM source ORDER BY A;
 6	12
 SELECT * FROM destination ORDER BY A;
 6	12
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 6-12
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (3, 6) from source;
 SELECT * FROM source ORDER BY A;
 6	12
 SELECT * FROM destination ORDER BY A;
 6	12
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 6-12
 -- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE
    source(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMMDD(timestamp)
        ORDER BY tuple();
 CREATE TABLE
    destination(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMM(timestamp)
        ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 -- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated
 DROP TABLE IF EXISTS source SYNC;
 DROP TABLE IF EXISTS destination SYNC;
 CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple();
 CREATE TABLE
    destination(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMM(timestamp)
        ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' from source;
 SELECT * FROM source ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT * FROM destination ORDER BY timestamp;
 2010-03-02 02:01:01
 2010-03-02 02:01:03
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 201003
 -- Should not be allowed because data would be split into two different partitions
 DROP TABLE IF EXISTS source SYNC;
 DROP TABLE IF EXISTS destination SYNC;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 }
 ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 }
 -- Should not be allowed because data would be split into two different partitions
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
 CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2);
 ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 }
 ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 }
 -- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
 -- Should not be allowed because dst partition exp depends on a different set of columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
 -- Should not be allowed because dst partition exp is not monotonically increasing
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2);
 CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName);
 INSERT INTO TABLE source VALUES ('bread'), ('mop');
 INSERT INTO TABLE source VALUES ('broccoli');
 ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 }
 -- Empty/ non-existent partition, same partition expression. Nothing should happen
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
 ALTER TABLE destination ATTACH PARTITION 1 FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Empty/ non-existent partition, different partition expression. Nothing should happen
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
 ALTER TABLE destination ATTACH PARTITION 1 FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination REPLACE PARTITION '1' FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id.
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
 CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
 INSERT INTO TABLE destination VALUES (1);
 ALTER TABLE destination REPLACE PARTITION '1' FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
--- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql
+++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql
@ -1,485 +0,0 @@
 -- { echoOn }
 -- Should be allowed since destination partition expr is monotonically increasing and compatible
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though
 -- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
 CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1);
 ALTER TABLE destination ATTACH PARTITION ID '0' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION 0 FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed because dst partition exp is monot inc and data is not split
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source;
 SELECT * FROM source ORDER BY productName;
 SELECT * FROM destination ORDER BY productName;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source;
 SELECT * FROM source ORDER BY productName;
 SELECT * FROM destination ORDER BY productName;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000);
 CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000)));
 INSERT INTO TABLE source VALUES (1267495261123);
 ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '14670' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp);
 CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400));
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1);
 ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '2010' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately
 -- fall into the same partition.
 -- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that
 -- partition by tuple() is accepted.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '201003' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
 -- Columns in this case refer to the expression elements, not to the actual table columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
 CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b);
 INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
 ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
 SELECT * FROM source ORDER BY (a, b, c);
 SELECT * FROM destination ORDER BY (a, b, c);
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
 SELECT * FROM source ORDER BY (a, b, c);
 SELECT * FROM destination ORDER BY (a, b, c);
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
 -- Columns in this case refer to the expression elements, not to the actual table columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
 CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a;
 INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
 ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
 SELECT * FROM source ORDER BY (a, b, c);
 SELECT * FROM destination ORDER BY (a, b, c);
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
 SELECT * FROM source ORDER BY (a, b, c);
 SELECT * FROM destination ORDER BY (a, b, c);
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed. Special test case, tricky to explain. First column of source partition expression is
 -- timestamp, while first column of destination partition expression is `A`. One of the previous implementations
 -- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp;
 CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5);
 ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (201003, 0) from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically
 -- increasing in the source partition min max indexes.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
 CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
 INSERT INTO TABLE source VALUES (6, 12);
 ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source;
 SELECT * FROM source ORDER BY A;
 SELECT * FROM destination ORDER BY A;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (6, 12) from source;
 SELECT * FROM source ORDER BY A;
 SELECT * FROM destination ORDER BY A;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed. The same scenario as above, but partition expressions inverted.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
 CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
 INSERT INTO TABLE source VALUES (6, 12);
 ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source;
 SELECT * FROM source ORDER BY A;
 SELECT * FROM destination ORDER BY A;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION (3, 6) from source;
 SELECT * FROM source ORDER BY A;
 SELECT * FROM destination ORDER BY A;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated.
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE
    source(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMMDD(timestamp)
        ORDER BY tuple();
 CREATE TABLE
    destination(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMM(timestamp)
        ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated
 DROP TABLE IF EXISTS source SYNC;
 DROP TABLE IF EXISTS destination SYNC;
 CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple();
 CREATE TABLE
    destination(timestamp DateTime)
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1')
        PARTITION BY toYYYYMM(timestamp)
        ORDER BY tuple();
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 TRUNCATE TABLE destination;
 ALTER TABLE destination ATTACH PARTITION '20100302' from source;
 SELECT * FROM source ORDER BY timestamp;
 SELECT * FROM destination ORDER BY timestamp;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Should not be allowed because data would be split into two different partitions
 DROP TABLE IF EXISTS source SYNC;
 DROP TABLE IF EXISTS destination SYNC;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03');
 ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 }
 ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 }
 -- Should not be allowed because data would be split into two different partitions
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
 CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
 INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2);
 ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 }
 ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 }
 -- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
 -- Should not be allowed because dst partition exp depends on a different set of columns
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
 CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName);
 INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
 INSERT INTO TABLE source VALUES ('rice', 'food');
 ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
 -- Should not be allowed because dst partition exp is not monotonically increasing
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2);
 CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName);
 INSERT INTO TABLE source VALUES ('bread'), ('mop');
 INSERT INTO TABLE source VALUES ('broccoli');
 ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 }
 ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 }
 -- Empty/ non-existent partition, same partition expression. Nothing should happen
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
 ALTER TABLE destination ATTACH PARTITION 1 FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Empty/ non-existent partition, different partition expression. Nothing should happen
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
 ALTER TABLE destination ATTACH PARTITION 1 FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
 ALTER TABLE destination REPLACE PARTITION '1' FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
 -- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id.
 -- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
 DROP TABLE IF EXISTS source;
 DROP TABLE IF EXISTS destination;
 CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
 CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
 INSERT INTO TABLE destination VALUES (1);
 ALTER TABLE destination REPLACE PARTITION '1' FROM source;
 SELECT * FROM destination;
 SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
@ -20,9 +20,21 @@
 23	Sx>b:^UG	XpedE)Q:	7433019734386307503
 29	2j&S)ba?XG	QuQj	17163829389637435056
 3	UlI+1		14144472852965836438
 0	PJFiUe#J2O	_s\'	14427935816175499794
 1	>T%O	,z<	17537932797009027240
 12	D[6,P	#}Lmb[ ZzU	6394957109822140795
 18		$_N-	24422838680427462
 2	bX?}ix [	Ny]2 G	16242612901291874718
 20	VE]	Y	15120036904703536841
 22	Ti~3)N)<	A!( 3	18361093572663329113
 23	Sx>b:^UG	XpedE)Q:	7433019734386307503
 29	2j&S)ba?XG	QuQj	17163829389637435056
 3	UlI+1		14144472852965836438
 =============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ===============
 0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer = 1
 0	3	SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10
-1	1	-- Parallel inner query alone\nSELECT\n    key,\n    value1,\n    value2,\n    toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1;
+1	1	-- Parallel inner query alone\nSELECT\n    key,\n    value1,\n    value2,\n    toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;
 1	1	-- Parallel inner query alone\nSELECT\n    key,\n    value1,\n    value2,\n    toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1;
 =============== OUTER QUERY (NO PARALLEL) ===============
 >T%O	,z<	10
 NQTpY#	W\\Xx4	10
@ -39,6 +51,16 @@ U	c	10
 UlI+1		10
 bX?}ix [	Ny]2 G	10
 t<iT	X48q:Z]t0	10
 >T%O	,z<	10
 NQTpY#	W\\Xx4	10
 PJFiUe#J2O	_s\'	10
 U	c	10
 UlI+1		10
 bX?}ix [	Ny]2 G	10
 t<iT	X48q:Z]t0	10
 0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2`
 0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
 0	3	SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2`
-0	3	SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_7105554115296635472_12427301373021079614` USING (`key`) GROUP BY `key`, `value1`, `value2`
+0	3	SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2`
-1	1	-- Parallel full query\nSELECT\n    value1,\n    value2,\n    avg(count) AS avg\nFROM\n    (\n        SELECT\n            key,\n            value1,\n            value2,\n            count() AS count\n        FROM join_outer_table\n        INNER JOIN\n        (\n            SELECT\n                key,\n                value1,\n                value2,\n                toUInt64(min(time)) AS start_ts\n            FROM join_inner_table\n            PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n            GROUP BY key, value1, value2\n        ) USING (key)\n        GROUP BY key, value1, value2\n        )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1;
+1	1	-- Parallel full query\nSELECT\n    value1,\n    value2,\n    avg(count) AS avg\nFROM\n    (\n        SELECT\n            key,\n            value1,\n            value2,\n            count() AS count\n        FROM join_outer_table\n        INNER JOIN\n        (\n            SELECT\n                key,\n                value1,\n                value2,\n                toUInt64(min(time)) AS start_ts\n            FROM join_inner_table\n            PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n            GROUP BY key, value1, value2\n        ) USING (key)\n        GROUP BY key, value1, value2\n        )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;
 1	1	-- Parallel full query\nSELECT\n    value1,\n    value2,\n    avg(count) AS avg\nFROM\n    (\n        SELECT\n            key,\n            value1,\n            value2,\n            count() AS count\n        FROM join_outer_table\n        INNER JOIN\n        (\n            SELECT\n                key,\n                value1,\n                value2,\n                toUInt64(min(time)) AS start_ts\n            FROM join_inner_table\n            PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n            GROUP BY key, value1, value2\n        ) USING (key)\n        GROUP BY key, value1, value2\n        )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1;
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql
@ -21,7 +21,6 @@ SELECT
    * FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2)
 LIMIT 100;
 SET allow_experimental_analyzer = 0;
 SET max_parallel_replicas = 3;
 SET prefer_localhost_replica = 1;
 SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost';
@ -39,6 +38,18 @@ FROM join_inner_table
 GROUP BY key, value1, value2
 ORDER BY key, value1, value2
 LIMIT 10;
 -- settings allow_experimental_analyzer=0;
 -- SELECT
 --     key,
 --     value1,
 --     value2,
 --     toUInt64(min(time)) AS start_ts
 -- FROM join_inner_table
 --     PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120'))
 -- GROUP BY key, value1, value2
 -- ORDER BY key, value1, value2
 -- LIMIT 10 settings allow_experimental_analyzer=1;
 SELECT '=============== INNER QUERY (PARALLEL) ===============';
@ -53,18 +64,31 @@ PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1
 GROUP BY key, value1, value2
 ORDER BY key, value1, value2
 LIMIT 10
-SETTINGS allow_experimental_parallel_reading_from_replicas = 1;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;
 -- Parallel inner query alone
 SELECT
    key,
    value1,
    value2,
    toUInt64(min(time)) AS start_ts
 FROM join_inner_table
 PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120'))
 GROUP BY key, value1, value2
 ORDER BY key, value1, value2
 LIMIT 10
 SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1;
 SELECT '=============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ===============';
 SYSTEM FLUSH LOGS;
 -- There should be 4 queries. The main query as received by the initiator and the 3 equal queries sent to each replica
-SELECT is_initial_query, count() as c, query,
+SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query
 FROM system.query_log
 WHERE
      event_date >= yesterday()
  AND type = 'QueryFinish'
-  AND initial_query_id =
+  AND initial_query_id IN
      (
          SELECT query_id
          FROM system.query_log
@ -160,18 +184,48 @@ FROM
        )
 GROUP BY value1, value2
 ORDER BY value1, value2
-SETTINGS allow_experimental_parallel_reading_from_replicas = 1;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;
 -- Parallel full query
 SELECT
    value1,
    value2,
    avg(count) AS avg
 FROM
    (
        SELECT
            key,
            value1,
            value2,
            count() AS count
        FROM join_outer_table
        INNER JOIN
        (
            SELECT
                key,
                value1,
                value2,
                toUInt64(min(time)) AS start_ts
            FROM join_inner_table
            PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120'))
            GROUP BY key, value1, value2
        ) USING (key)
        GROUP BY key, value1, value2
        )
 GROUP BY value1, value2
 ORDER BY value1, value2
 SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1;
 SYSTEM FLUSH LOGS;
 -- There should be 7 queries. The main query as received by the initiator, the 3 equal queries to execute the subquery
 -- in the inner join and the 3 queries executing the whole query (but replacing the subquery with a temp table)
-SELECT is_initial_query, count() as c, query,
+SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query
 FROM system.query_log
 WHERE
      event_date >= yesterday()
  AND type = 'QueryFinish'
-  AND initial_query_id =
+  AND initial_query_id IN
      (
          SELECT query_id
          FROM system.query_log
--- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference
+++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference
@ -1,6 +1,4 @@
 02784_automatic_parallel_replicas_join-default_simple_join_10M_pure	0 estimated parallel replicas
 02784_automatic_parallel_replicas_join-default_simple_join_10M_pure	1 estimated parallel replicas
 02784_automatic_parallel_replicas_join-default_simple_join_5M_pure	0 estimated parallel replicas
 02784_automatic_parallel_replicas_join-default_simple_join_5M_pure	2 estimated parallel replicas
 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure	1 estimated parallel replicas
 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure	10 estimated parallel replicas
--- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
+++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
@ -68,7 +68,7 @@ function run_query_with_pure_parallel_replicas () {
        --allow_experimental_parallel_reading_from_replicas 1 \
        --parallel_replicas_for_non_replicated_merge_tree 1 \
        --parallel_replicas_min_number_of_rows_per_replica "$2" \
-    |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }'
+    |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' | sort -n -k2 -b | grep -Pv "\t0 estimated parallel replicas"
 }
 query_id_base="02784_automatic_parallel_replicas_join-$CLICKHOUSE_DATABASE"
--- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference
+++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference
@ -1,5 +1,7 @@
 1
 02901_parallel_replicas_rollup-default	Used parallel replicas: true
 Distributed query with analyzer
 1
 0	0	0	6
 2019	0	0	2
 2019	1	0	2
--- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
+++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
@ -39,6 +39,11 @@ $CLICKHOUSE_CLIENT \
  ORDER BY max((SELECT 1 WHERE 0));
 ";
 were_parallel_replicas_used $query_id
 # It was a bug in analyzer distributed header.
 echo "Distributed query with analyzer"
 $CLICKHOUSE_CLIENT --query "SELECT 1 FROM remote('127.0.0.{2,3}', currentDatabase(), nested) GROUP BY 1 WITH ROLLUP ORDER BY max((SELECT 1 WHERE 0))"
 $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS nested"