better

2024-09-20 08:40:50 +00:00 · 2021-04-22 21:04:32 +03:00 · 2021-04-22 21:04:32 +03:00 · 90ab394769
commit 90ab394769
parent 2017d2f918
14 changed files with 558 additions and 235 deletions
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -6,7 +6,9 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/setThreadName.h>
 #include <IO/ConnectionTimeoutsContext.h>
-
+#include <Interpreters/InterpreterInsertQuery.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <DataStreams/ExpressionBlockInputStream.h>
 namespace DB
 {
@ -20,6 +22,12 @@ namespace ErrorCodes
 }
 std::string wrapWithColor(const std::string & value)
 {
    return "\u001b[36;1m" + value + "\u001b[0m";
 }
 void ClusterCopier::init()
 {
    auto zookeeper = getContext()->getZooKeeper();
@ -29,7 +37,7 @@ void ClusterCopier::init()
        if (response.error != Coordination::Error::ZOK)
            return;
        UInt64 version = ++task_description_version;
-        LOG_DEBUG(log, "Task description should be updated, local version {}", version);
+        LOG_INFO(log, "Task description should be updated, local version {}", version);
    };
    task_description_path = task_zookeeper_path + "/description";
@ -50,7 +58,7 @@ void ClusterCopier::init()
        task_table.initShards(task_cluster->random_engine);
    }
-    LOG_DEBUG(log, "Will process {} table tasks", task_cluster->table_tasks.size());
+    LOG_INFO(log, "Will process {} table tasks", task_cluster->table_tasks.size());
    /// Do not initialize tables, will make deferred initialization in process()
@ -138,7 +146,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
        {
            if (!task_table.enabled_partitions_set.count(partition_name))
            {
-                LOG_DEBUG(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id);
+                LOG_INFO(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id);
            }
        }
    }
@ -173,7 +181,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
        LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str());
    }
-    LOG_DEBUG(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription());
+    LOG_INFO(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription());
 }
 void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads)
@ -189,9 +197,10 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts,
                discoverShardPartitions(timeouts, task_shard);
            });
-        LOG_DEBUG(log, "Waiting for {} setup jobs", thread_pool.active());
+        LOG_INFO(log, "Waiting for {} setup jobs", thread_pool.active());
        thread_pool.wait();
    }
    std::cout << "discoverTablePartitions  finished" << std::endl;
 }
 void ClusterCopier::uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force)
@ -213,7 +222,7 @@ void ClusterCopier::uploadTaskDescription(const std::string & task_path, const s
    if (code != Coordination::Error::ZOK && force)
        zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
-    LOG_DEBUG(log, "Task description {} uploaded to {} with result {} ({})",
+    LOG_INFO(log, "Task description {} uploaded to {} with result {} ({})",
        ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code));
 }
@ -230,7 +239,7 @@ void ClusterCopier::reloadTaskDescription()
    if (code != Coordination::Error::ZOK)
        throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS);
-    LOG_DEBUG(log, "Loading description, zxid={}", task_description_current_stat.czxid);
+    LOG_INFO(log, "Loading description, zxid={}", task_description_current_stat.czxid);
    auto config = getConfigurationFromXMLString(task_config_str);
    /// Setup settings
@ -250,7 +259,7 @@ void ClusterCopier::updateConfigIfNeeded()
    if (!is_outdated_version && !is_expired_session)
        return;
-    LOG_DEBUG(log, "Updating task description");
+    LOG_INFO(log, "Updating task description");
    reloadTaskDescription();
    task_description_current_version = version_to_update;
@ -361,7 +370,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
        if (static_cast<UInt64>(stat.numChildren) >= task_cluster->max_workers)
        {
-            LOG_DEBUG(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description);
+            LOG_INFO(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description);
            if (unprioritized)
                current_sleep_time = std::min(max_sleep_time, current_sleep_time + default_sleep_time);
@ -387,7 +396,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
                /// Try to make fast retries
                if (num_bad_version_errors > 3)
                {
-                    LOG_DEBUG(log, "A concurrent worker has just been added, will check free worker slots again");
+                    LOG_INFO(log, "A concurrent worker has just been added, will check free worker slots again");
                    std::chrono::milliseconds random_sleep_time(std::uniform_int_distribution<int>(1, 1000)(task_cluster->random_engine));
                    std::this_thread::sleep_for(random_sleep_time);
                    num_bad_version_errors = 0;
@ -422,7 +431,7 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
    {
        bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition);
        if (!piece_is_done)
-            LOG_DEBUG(log, "Partition {} piece {} is not already done.", partition_name, piece_number);
+            LOG_INFO(log, "Partition {} piece {} is not already done.", partition_name, piece_number);
        answer &= piece_is_done;
    }
@ -438,7 +447,7 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
 bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
                               size_t piece_number, const TasksShard & shards_with_partition)
 {
-    LOG_DEBUG(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number);
+    LOG_INFO(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number);
    auto zookeeper = getContext()->getZooKeeper();
@ -530,7 +539,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
        inject_fault = value < move_fault_probability;
    }
-    LOG_DEBUG(log, "Try to move {} to destination table", partition_name);
+    LOG_INFO(log, "Try to move {} to destination table", partition_name);
    auto zookeeper = getContext()->getZooKeeper();
@ -548,7 +557,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
    {
        if (e.code == Coordination::Error::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Someone is already moving pieces {}", current_partition_attach_is_active);
+            LOG_INFO(log, "Someone is already moving pieces {}", current_partition_attach_is_active);
            return TaskStatus::Active;
        }
@ -565,13 +574,13 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
            TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
            if (status.state == TaskState::Finished)
            {
-                LOG_DEBUG(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner);
+                LOG_INFO(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner);
                return TaskStatus::Finished;
            }
            /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
            /// Initialize DROP PARTITION
-            LOG_DEBUG(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner);
+            LOG_INFO(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner);
            /// Remove is_done marker.
            zookeeper->remove(current_partition_attach_is_done);
@ -588,7 +597,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
    /// Move partition to original destination table.
    for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number)
    {
-        LOG_DEBUG(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number));
+        LOG_INFO(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number));
        ASTPtr query_alter_ast;
        String query_alter_ast_string;
@ -611,7 +620,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                                  ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name +
                                  " FROM " + getQuotedTable(helping_table);
-        LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string);
+        LOG_INFO(log, "Executing ALTER query: {}", query_alter_ast_string);
        try
        {
@ -620,9 +629,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                task_table.cluster_push,
                query_alter_ast_string,
                task_cluster->settings_push,
-                PoolMode::GET_MANY,
+                execution_mode);
                execution_mode,
                max_successful_executions_per_shard);
            if (settings_push.replication_alter_partitions_sync == 1)
            {
@ -645,7 +652,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
        }
        catch (...)
        {
-            LOG_DEBUG(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number));
+            LOG_INFO(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number));
            throw;
        }
@ -660,20 +667,20 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
                                                ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
-                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string);
+                LOG_INFO(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string);
                UInt64 num_nodes = executeQueryOnCluster(
                        task_table.cluster_push,
                        query_deduplicate_ast_string,
                        task_cluster->settings_push,
-                        PoolMode::GET_MANY);
+                        ClusterExecutionMode::ON_EACH_SHARD);
                LOG_INFO(log, "Number of shard that executed OPTIMIZE DEDUPLICATE query successfully : {}", toString(num_nodes));
            }
        }
        catch (...)
        {
-            LOG_DEBUG(log, "Error while executing OPTIMIZE DEDUPLICATE partition {}in the original table", partition_name);
+            LOG_INFO(log, "Error while executing OPTIMIZE DEDUPLICATE partition {}in the original table", partition_name);
            throw;
        }
    }
@ -739,6 +746,8 @@ std::shared_ptr<ASTCreateQuery> rewriteCreateQueryStorage(const ASTPtr & create_
    res->children.clear();
    res->set(res->columns_list, create.columns_list->clone());
    res->set(res->storage, new_storage_ast->clone());
    /// Just to make it better and don't store additional flag like `is_table_created` somewhere else
    res->if_not_exists = true;
    return res;
 }
@ -771,7 +780,7 @@ bool ClusterCopier::tryDropPartitionPiece(
    {
        if (e.code == Coordination::Error::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number));
+            LOG_INFO(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number));
            std::this_thread::sleep_for(default_sleep_time);
            return false;
        }
@ -784,7 +793,7 @@ bool ClusterCopier::tryDropPartitionPiece(
    {
        if (stat.numChildren != 0)
        {
-            LOG_DEBUG(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren);
+            LOG_INFO(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren);
            std::this_thread::sleep_for(default_sleep_time);
            return false;
        }
@ -804,7 +813,7 @@ bool ClusterCopier::tryDropPartitionPiece(
        {
            if (e.code == Coordination::Error::ZNODEEXISTS)
            {
-                LOG_DEBUG(log, "Partition {} is being filled now by somebody, sleep", task_partition.name);
+                LOG_INFO(log, "Partition {} is being filled now by somebody, sleep", task_partition.name);
                return false;
            }
@ -842,12 +851,11 @@ bool ClusterCopier::tryDropPartitionPiece(
        /// It is important, DROP PARTITION must be done synchronously
        settings_push.replication_alter_partitions_sync = 2;
-        LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
+        LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query);
        /// We have to drop partition_piece on each replica
        size_t num_shards = executeQueryOnCluster(
                cluster_push, query,
                settings_push,
                PoolMode::GET_MANY,
                ClusterExecutionMode::ON_EACH_NODE);
        LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards);
@ -863,7 +871,7 @@ bool ClusterCopier::tryDropPartitionPiece(
        }
        else
        {
-            LOG_DEBUG(log, "Clean state is altered when dropping the partition, cowardly bailing");
+            LOG_INFO(log, "Clean state is altered when dropping the partition, cowardly bailing");
            /// clean state is stale
            return false;
        }
@ -907,7 +915,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
        ++cluster_partition.total_tries;
-        LOG_DEBUG(log, "Processing partition {} for the whole cluster", partition_name);
+        LOG_INFO(log, "Processing partition {} for the whole cluster", partition_name);
        /// Process each source shard having current partition and copy current partition
        /// NOTE: shards are sorted by "distance" to current host
@ -929,7 +937,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
                    {
                        const size_t number_of_splits = task_table.number_of_splits;
                        shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits));
-                        LOG_DEBUG(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription());
+                        LOG_INFO(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription());
                        /// To save references in the future.
                        auto shard_partition_it = shard->partition_tasks.find(partition_name);
                        PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces;
@ -942,7 +950,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
                    }
                    else
                    {
-                        LOG_DEBUG(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name);
+                        LOG_INFO(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name);
                        continue;
                    }
                }
@ -1100,18 +1108,18 @@ TaskStatus ClusterCopier::tryCreateDestinationTable(const ConnectionTimeouts & t
        InterpreterCreateQuery::prepareOnClusterQuery(create, getContext(), task_table.cluster_push_name);
        String query = queryToString(create_query_push_ast);
-        LOG_DEBUG(log, "Create destination tables. Query: {}", query);
+        LOG_INFO(log, "Create destination tables. Query: \n {}", wrapWithColor(query));
-        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, PoolMode::GET_MANY);
+        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push,  ClusterExecutionMode::ON_EACH_NODE);
        LOG_INFO(
            log,
            "Destination tables {} have been created on {} shards of {}",
            getQuotedTable(task_table.table_push),
            shards,
            task_table.cluster_push->getShardCount());
-        if (shards != task_table.cluster_push->getShardCount())
+        // if (shards != task_table.cluster_push->getShardCount())
-        {
+        // {
-            return TaskStatus::Error;
+        //     return TaskStatus::Error;
-        }
+        // }
    }
    catch (...)
    {
@ -1226,17 +1234,17 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    auto create_is_dirty_node = [&] (const CleanStateClock & clock)
    {
        if (clock.is_stale())
-            LOG_DEBUG(log, "Clean state clock is stale while setting dirty flag, cowardly bailing");
+            LOG_INFO(log, "Clean state clock is stale while setting dirty flag, cowardly bailing");
        else if (!clock.is_clean())
-            LOG_DEBUG(log, "Thank you, Captain Obvious");
+            LOG_INFO(log, "Thank you, Captain Obvious");
        else if (clock.discovery_version)
        {
-            LOG_DEBUG(log, "Updating clean state clock");
+            LOG_INFO(log, "Updating clean state clock");
            zookeeper->set(piece_is_dirty_flag_path, host_id, clock.discovery_version.value());
        }
        else
        {
-            LOG_DEBUG(log, "Creating clean state clock");
+            LOG_INFO(log, "Creating clean state clock");
            zookeeper->create(piece_is_dirty_flag_path, host_id, zkutil::CreateMode::Persistent);
        }
    };
@ -1271,7 +1279,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    /// Load balancing
    auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task);
-    LOG_DEBUG(log, "Processing {}", current_task_piece_status_path);
+    LOG_INFO(log, "Processing {}", current_task_piece_status_path);
    const String piece_status_path = partition_piece.getPartitionPieceShardsPath();
@ -1282,12 +1290,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    /// Do not start if partition piece is dirty, try to clean it
    if (is_clean)
    {
-        LOG_DEBUG(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number);
+        LOG_INFO(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number);
        zookeeper->createAncestors(current_task_piece_status_path);
    }
    else
    {
-        LOG_DEBUG(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number);
+        LOG_INFO(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number);
        try
        {
@ -1312,7 +1320,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    {
        if (e.code == Coordination::Error::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Someone is already processing {}", current_task_piece_is_active_path);
+            LOG_INFO(log, "Someone is already processing {}", current_task_piece_is_active_path);
            return TaskStatus::Active;
        }
@ -1328,13 +1336,13 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
            TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
            if (status.state == TaskState::Finished)
            {
-                LOG_DEBUG(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner);
+                LOG_INFO(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner);
                return TaskStatus::Finished;
            }
            /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
            /// Initialize DROP PARTITION
-            LOG_DEBUG(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner);
+            LOG_INFO(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner);
            create_is_dirty_node(clean_state_clock);
            return TaskStatus::Error;
@ -1342,6 +1350,53 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    }
    /// Try create table (if not exists) on each shard
    /// We have to create this table even in case that partition piece is empty
    /// This is significant, because we will have simplier code
    {
        /// 1) Get columns description from any replica of destination cluster
        /// 2) Change ENGINE, database and table name
        /// 3) Create helping table on the whole destination cluster
        auto & settings_push = task_cluster->settings_push;
        /// Get a connection to any shard to fetch `CREATE` query
        auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, &settings_push, true);
        /// Execute a request and get `CREATE` query as a string.
        String create_query = getRemoteCreateTable(task_shard.task_table.table_push, *connection, settings_push);
        /// Parse it to ASTPtr
        ParserCreateQuery parser_create_query;
        auto create_query_ast = parseQuery(parser_create_query, create_query, settings_push.max_query_size, settings_push.max_parser_depth);
        /// Define helping table database and name for current partition piece
        DatabaseAndTableName database_and_table_for_current_piece{
                task_table.table_push.first,
                task_table.table_push.second + "_piece_" + toString(current_piece_number)};
        /// This is a bit of legacy, because we now could parse and engine AST from the whole create query.
        /// But this is needed to make helping table non-replicated. We simply don't need this
        auto new_engine_push_ast = task_table.engine_push_ast;
        if (task_table.isReplicatedTable())
            new_engine_push_ast = task_table.rewriteReplicatedCreateQueryToPlain();
        /// Take columns definition from destination table, new database and table name, and new engine (non replicated variant of MergeTree)
        auto create_query_push_ast = rewriteCreateQueryStorage(create_query_ast, database_and_table_for_current_piece, new_engine_push_ast);
        String query = queryToString(create_query_push_ast);
        LOG_INFO(log, "Create destination tables. Query: \n {}", wrapWithColor(query));
        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push,  ClusterExecutionMode::ON_EACH_NODE);
        LOG_INFO(
            log,
            "Destination tables {} have been created on {} shards of {}",
            getQuotedTable(task_table.table_push),
            shards,
            task_table.cluster_push->getShardCount());
        // if (shards != task_table.cluster_push->getShardCount())
        // {
        //     return TaskStatus::Error;
        // }
    }
    /// Exit if current piece is absent on this shard. Also mark it as finished, because we will check
    /// whether each shard have processed each partitition (and its pieces).
    if (partition_piece.is_absent_piece)
@ -1349,9 +1404,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id);
        auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent);
        if (res == Coordination::Error::ZNODEEXISTS)
-            LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number);
+            LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number);
        if (res == Coordination::Error::ZOK)
-            LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number);
+            LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number);
        return TaskStatus::Finished;
    }
@ -1415,40 +1470,6 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        zookeeper->create(current_task_piece_status_path, start_state, zkutil::CreateMode::Persistent);
    }
    /// Try create table (if not exists) on each shard
    {
        /// Define push table for current partition piece
        auto database_and_table_for_current_piece= std::pair<String, String>(
                task_table.table_push.first,
                task_table.table_push.second + "_piece_" + toString(current_piece_number));
        auto new_engine_push_ast = task_table.engine_push_ast;
        if (task_table.isReplicatedTable())
        {
            new_engine_push_ast = task_table.rewriteReplicatedCreateQueryToPlain();
        }
        auto create_query_push_ast = rewriteCreateQueryStorage(
                task_shard.current_pull_table_create_query,
                database_and_table_for_current_piece, new_engine_push_ast);
        create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
        String query = queryToString(create_query_push_ast);
        LOG_DEBUG(log, "Create destination tables. Query: {}", query);
        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, PoolMode::GET_MANY);
        LOG_INFO(
            log,
            "Destination tables {} have been created on {} shards of {}",
            getQuotedTable(task_table.table_push),
            shards,
            task_table.cluster_push->getShardCount());
        if (shards != task_table.cluster_push->getShardCount())
        {
            return TaskStatus::Error;
        }
    }
    /// Do the copying
    {
@ -1462,7 +1483,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        // Select all fields
        ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /*enable_splitting*/ true, inject_fault ? "1" : "");
-        LOG_DEBUG(log, "Executing SELECT query and pull from {} : {}", task_shard.getDescription(), queryToString(query_select_ast));
+        LOG_INFO(log, "Executing SELECT query and pull from {} : {}", task_shard.getDescription(), queryToString(query_select_ast));
        ASTPtr query_insert_ast;
        {
@ -1473,7 +1494,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
            const auto & settings = getContext()->getSettingsRef();
            query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth);
-            LOG_DEBUG(log, "Executing INSERT query: {}", query);
+            LOG_INFO(log, "Executing INSERT query: {}", query);
        }
        try
@ -1491,8 +1512,24 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
                BlockIO io_select = InterpreterFactory::get(query_select_ast, context_select)->execute();
                BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute();
-                input = io_select.getInputStream();
+                auto pure_input = io_select.getInputStream();
                output = io_insert.out;
                /// Add converting actions to make it possible to copy blocks with slightly different schema
                const auto & select_block = pure_input->getHeader();
                const auto & insert_block = output->getHeader();
                auto actions_dag = ActionsDAG::makeConvertingActions(
                        select_block.getColumnsWithTypeAndName(),
                        insert_block.getColumnsWithTypeAndName(),
                        ActionsDAG::MatchColumnsMode::Position);
                auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext()));
                input = std::make_shared<ExpressionBlockInputStream>(pure_input, actions);
                std::cout << "Input:" << std::endl;
                std::cout << input->getHeader().dumpStructure() << std::endl;
                std::cout << "Output:" << std::endl;
                std::cout << output->getHeader().dumpStructure() << std::endl;
            }
            /// Fail-fast optimization to abort copying when the current clean state expires
@ -1600,7 +1637,7 @@ void ClusterCopier::dropLocalTableIfExists(const DatabaseAndTableName & table_na
 void ClusterCopier::dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number)
 {
-    LOG_DEBUG(log, "Removing helping tables piece {}", current_piece_number);
+    LOG_INFO(log, "Removing helping tables piece {}", current_piece_number);
    DatabaseAndTableName original_table = task_table.table_push;
    DatabaseAndTableName helping_table
@ -1611,17 +1648,17 @@ void ClusterCopier::dropHelpingTablesByPieceNumber(const TaskTable & task_table,
    const ClusterPtr & cluster_push = task_table.cluster_push;
    Settings settings_push = task_cluster->settings_push;
-    LOG_DEBUG(log, "Execute distributed DROP TABLE: {}", query);
+    LOG_INFO(log, "Execute distributed DROP TABLE: {}", query);
    /// We have to drop partition_piece on each replica
-    UInt64 num_nodes = executeQueryOnCluster(cluster_push, query, settings_push, PoolMode::GET_MANY, ClusterExecutionMode::ON_EACH_NODE);
+    UInt64 num_nodes = executeQueryOnCluster(cluster_push, query, settings_push, ClusterExecutionMode::ON_EACH_NODE);
    LOG_INFO(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes));
 }
 void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
 {
-    LOG_DEBUG(log, "Removing helping tables");
+    LOG_INFO(log, "Removing helping tables");
    for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number)
    {
        dropHelpingTablesByPieceNumber(task_table, current_piece_number);
@ -1630,7 +1667,7 @@ void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
 void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name)
 {
-    LOG_DEBUG(log, "Try drop partition partition from all helping tables.");
+    LOG_INFO(log, "Try drop partition partition from all helping tables.");
    for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number)
    {
        DatabaseAndTableName original_table = task_table.table_push;
@ -1641,17 +1678,16 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
        const ClusterPtr & cluster_push = task_table.cluster_push;
        Settings settings_push = task_cluster->settings_push;
-        LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
+        LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query);
        /// We have to drop partition_piece on each replica
        UInt64 num_nodes = executeQueryOnCluster(
                cluster_push, query,
                settings_push,
                PoolMode::GET_MANY,
                ClusterExecutionMode::ON_EACH_NODE);
        LOG_INFO(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes));
    }
-    LOG_DEBUG(log, "All helping tables dropped partition {}", partition_name);
+    LOG_INFO(log, "All helping tables dropped partition {}", partition_name);
 }
 String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings)
@ -1666,6 +1702,8 @@ String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, C
    return typeid_cast<const ColumnString &>(*block.safeGetByPosition(0).column).getDataAt(0).toString();
 }
 ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard)
 {
    /// Fetch and parse (possibly) new definition
@ -1680,6 +1718,21 @@ ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & time
    return parseQuery(parser_create_query, create_query_pull_str, settings.max_query_size, settings.max_parser_depth);
 }
 ASTPtr ClusterCopier::getCreateTableForPushShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard)
 {
    /// Fetch and parse (possibly) new definition
    auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_push, true);
    String create_query_pull_str = getRemoteCreateTable(
            task_shard.task_table.table_push,
            *connection_entry,
            task_cluster->settings_push);
    ParserCreateQuery parser_create_query;
    const auto & settings = getContext()->getSettingsRef();
    return parseQuery(parser_create_query, create_query_pull_str, settings.max_query_size, settings.max_parser_depth);
 }
 /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it.
 void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeouts,
        TaskShard & task_shard, bool create_split)
@ -1688,6 +1741,8 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
    /// We need to update table definitions for each part, it could be changed after ALTER
    task_shard.current_pull_table_create_query = getCreateTableForPullShard(timeouts, task_shard);
    // task_shard.current_push_table_create_query = getCreateTableForPushShard(timeouts, task_shard);
    /// Create local Distributed tables:
    ///  a table fetching data from current shard and a table inserting data to the whole destination cluster
@ -1709,7 +1764,8 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
    auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second);
-    auto create_query_ast = removeAliasColumnsFromCreateQuery(task_shard.current_pull_table_create_query);
+    // auto create_query_ast = removeAliasColumnsFromCreateQuery(task_shard.current_pull_table_create_query);
    auto create_query_ast = task_shard.current_pull_table_create_query;
    auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast);
    dropAndCreateLocalTable(create_table_pull_ast);
@ -1768,7 +1824,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
    const auto & settings = getContext()->getSettingsRef();
    ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);
-    LOG_DEBUG(log, "Computing destination partition set, executing query: {}", query);
+    LOG_INFO(log, "Computing destination partition set, executing query: \n {}", wrapWithColor(query));
    auto local_context = Context::createCopy(context);
    local_context->setSettings(task_cluster->settings_pull);
@ -1787,7 +1843,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
        }
    }
-    LOG_DEBUG(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription());
+    LOG_INFO(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription());
    return res;
 }
@ -1799,21 +1855,22 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,
    TaskTable & task_table = task_shard.task_table;
-    std::string query = "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard)
+    WriteBufferFromOwnString ss;
-                        + " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) +
+    ss << "SELECT 1 FROM " << getQuotedTable(task_shard.table_read_shard);
-                        " = (" + partition_quoted_name + " AS partition_key))";
+    ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast);
-
+    ss << " = (" + partition_quoted_name << " AS partition_key))";
    if (!task_table.where_condition_str.empty())
-        query += " AND (" + task_table.where_condition_str + ")";
+        ss << " AND (" << task_table.where_condition_str << ")";
-
+    ss << " LIMIT 1";
-    query += " LIMIT 1";
+    auto query = ss.str();
    LOG_DEBUG(log, "Checking shard {} for partition {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, query);
    ParserQuery parser_query(query.data() + query.size());
    const auto & settings = getContext()->getSettingsRef();
    ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);
    LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: \n {}", 
        task_shard.getDescription(), partition_quoted_name, query_ast->formatForErrorMessage());
    auto local_context = Context::createCopy(context);
    local_context->setSettings(task_cluster->settings_pull);
    return InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows() != 0;
@ -1847,7 +1904,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
    query += " LIMIT 1";
-    LOG_DEBUG(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);
+    LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: \n \u001b[36m {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);
    ParserQuery parser_query(query.data() + query.size());
    const auto & settings = getContext()->getSettingsRef();
@ -1857,12 +1914,13 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
    local_context->setSettings(task_cluster->settings_pull);
    auto result = InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows();
    if (result != 0)
-        LOG_DEBUG(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
+        LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
    else
-        LOG_DEBUG(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
+        LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
    return result != 0;
 }
 /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
  * Returns number of shards for which at least one replica executed query successfully
  */
@ -1870,112 +1928,64 @@ UInt64 ClusterCopier::executeQueryOnCluster(
        const ClusterPtr & cluster,
        const String & query,
        const Settings & current_settings,
-        PoolMode pool_mode,
+        ClusterExecutionMode execution_mode) const
        ClusterExecutionMode execution_mode,
        UInt64 max_successful_executions_per_shard) const
 {
-    auto num_shards = cluster->getShardsInfo().size();
+    ClusterPtr cluster_for_query = cluster;
    std::vector<UInt64> per_shard_num_successful_replicas(num_shards, 0);
    ParserQuery p_query(query.data() + query.size());
    ASTPtr query_ast = parseQuery(p_query, query, current_settings.max_query_size, current_settings.max_parser_depth);
    /// We will have to execute query on each replica of a shard.
    if (execution_mode == ClusterExecutionMode::ON_EACH_NODE)
-        max_successful_executions_per_shard = 0;
+        cluster_for_query = cluster->getClusterWithReplicasAsShards(current_settings);
-    std::atomic<size_t> origin_replicas_number = 0;
+    std::vector<std::shared_ptr<Connection>> connections;
    connections.reserve(cluster->getShardCount());
-    /// We need to execute query on one replica at least
+    std::atomic<UInt64> successfully_executed = 0;
-    auto do_for_shard = [&] (UInt64 shard_index, Settings shard_settings)
+
    for (const auto & replicas : cluster_for_query->getShardsAddresses())
    {
-        setThreadName("QueryForShard");
+        const auto & node = replicas[0];
-
+        try
        const Cluster::ShardInfo & shard = cluster->getShardsInfo().at(shard_index);
        UInt64 & num_successful_executions = per_shard_num_successful_replicas.at(shard_index);
        num_successful_executions = 0;
        auto increment_and_check_exit = [&] () -> bool
        {
-            ++num_successful_executions;
+            connections.emplace_back(std::make_shared<Connection>(
-            return max_successful_executions_per_shard && num_successful_executions >= max_successful_executions_per_shard;
+                node.host_name, node.port, node.default_database,
-        };
+                node.user, node.password, node.cluster, node.cluster_secret,
                "ClusterCopier", node.compression, node.secure
            ));
-        UInt64 num_replicas = cluster->getShardsAddresses().at(shard_index).size();
+            /// We execute only Alter, Create and Drop queries.
            const auto header = Block{};
-        origin_replicas_number += num_replicas;
+            /// For unknown reason global context is passed to IStorage::read() method
-        UInt64 num_local_replicas = shard.getLocalNodeCount();
+            /// So, task_identifier is passed as constructor argument. It is more obvious.
-        UInt64 num_remote_replicas = num_replicas - num_local_replicas;
+            auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                    *connections.back(), query, header, getContext(),
                    /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete);
-        /// In that case we don't have local replicas, but do it just in case
+            try
        for (UInt64 i = 0; i < num_local_replicas; ++i)
        {
            auto interpreter = InterpreterFactory::get(query_ast, getContext());
            interpreter->execute();
            if (increment_and_check_exit())
                return;
        }
        /// Will try to make as many as possible queries
        if (shard.hasRemoteConnections())
        {
            shard_settings.max_parallel_replicas = num_remote_replicas ? num_remote_replicas : 1;
            auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(shard_settings).getSaturated(shard_settings.max_execution_time);
            auto connections = shard.pool->getMany(timeouts, &shard_settings, pool_mode);
            auto shard_context = Context::createCopy(context);
            shard_context->setSettings(shard_settings);
            for (auto & connection : connections)
            {
-                if (connection.isNull())
+                remote_query_executor->sendQuery();
                    continue;
                try
                {
                    /// CREATE TABLE and DROP PARTITION queries return empty block
                    RemoteBlockInputStream stream{*connection, query, Block{}, shard_context};
                    NullBlockOutputStream output{Block{}};
                    copyData(stream, output);
                    if (increment_and_check_exit())
                        return;
                }
                catch (const Exception &)
                {
                    LOG_INFO(log, getCurrentExceptionMessage(false, true));
                }
            }
            catch (...)
            {
                LOG_WARNING(log, "Seemns like node with address {} is unreachable {}", node.host_name);
            }
            while (true)
            {
                auto block = remote_query_executor->read();
                if (!block)
                    break;
            }
            remote_query_executor->finish();
            ++successfully_executed;
        }
        catch (...)
        {
            LOG_WARNING(log, "An error occured while processing query : \n {}", wrapWithColor(query));
            tryLogCurrentException(log);
        }
    };
    {
        ThreadPool thread_pool(std::min<UInt64>(num_shards, getNumberOfPhysicalCPUCores()));
        for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index)
            thread_pool.scheduleOrThrowOnError([=, shard_settings = current_settings] { do_for_shard(shard_index, std::move(shard_settings)); });
        thread_pool.wait();
    }
-    UInt64 successful_nodes = 0;
+    return successfully_executed.load();
    for (UInt64 num_replicas : per_shard_num_successful_replicas)
    {
        if (execution_mode == ClusterExecutionMode::ON_EACH_NODE)
            successful_nodes += num_replicas;
        else
            /// Count only successful shards
            successful_nodes += (num_replicas > 0);
    }
    if (execution_mode == ClusterExecutionMode::ON_EACH_NODE && successful_nodes != origin_replicas_number)
    {
        LOG_INFO(log, "There was an error while executing ALTER on each node. Query was executed on {} nodes. But had to be executed on {}", toString(successful_nodes), toString(origin_replicas_number.load()));
    }
    return successful_nodes;
 }
 }
--- a/programs/copier/ClusterCopier.h
+++ b/programs/copier/ClusterCopier.h
@ -18,12 +18,15 @@ public:
    ClusterCopier(const String & task_path_,
                  const String & host_id_,
                  const String & proxy_database_name_,
-                  ContextPtr context_)
+                  ContextPtr context_,
                  Poco::Logger * log_)
            : WithContext(context_),
            task_zookeeper_path(task_path_),
            host_id(host_id_),
            working_database_name(proxy_database_name_),
-            log(&Poco::Logger::get("ClusterCopier")) {}
+            log(log_) {
                std::cout << "Level from constructor" << log->getLevel() << std::endl;
            }
    void init();
@ -159,6 +162,7 @@ protected:
    String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings);
    ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
    ASTPtr getCreateTableForPushShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
    /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it.
    void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true);
@ -189,9 +193,7 @@ protected:
            const ClusterPtr & cluster,
            const String & query,
            const Settings & current_settings,
-            PoolMode pool_mode = PoolMode::GET_ALL,
+            ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const;
            ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD,
            UInt64 max_successful_executions_per_shard = 0) const;
 private:
    String task_zookeeper_path;
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -21,7 +21,7 @@ void ClusterCopierApp::initialize(Poco::Util::Application & self)
    config_xml_path = config().getString("config-file");
    task_path = config().getString("task-path");
-    log_level = config().getString("log-level", "trace");
+    log_level = config().getString("log-level", "info");
    is_safe_mode = config().has("safe-mode");
    if (config().has("copy-fault-probability"))
        copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0);
@ -110,6 +110,8 @@ void ClusterCopierApp::mainImpl()
    ThreadStatus thread_status;
    auto * log = &logger();
    log->setLevel(6); /// Information
    std::cout << log->getLevel() << std::endl;
    LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision());
    SharedContextHolder shared_context = Context::createShared();
@ -137,7 +139,7 @@ void ClusterCopierApp::mainImpl()
    CurrentThread::QueryScope query_scope(context);
    std::cout << "Will construct copier" << std::endl;
-    auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, context);
+    auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, context, log);
    copier->setSafeMode(is_safe_mode);
    copier->setCopyFaultProbability(copy_fault_probability);
    copier->setMoveFaultProbability(move_fault_probability);
--- a/programs/copier/TaskTableAndShard.h
+++ b/programs/copier/TaskTableAndShard.h
@ -50,7 +50,6 @@ struct TaskTable
    String getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const;
    bool isReplicatedTable() const { return is_replicated_table; }
    /// Partitions will be split into number-of-splits pieces.
@ -181,6 +180,7 @@ struct TaskShard
    /// Last CREATE TABLE query of the table of the shard
    ASTPtr current_pull_table_create_query;
    ASTPtr current_push_table_create_query;
    /// Internal distributed tables
    DatabaseAndTableName table_read_shard;
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -497,14 +497,14 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
        ClusterPtr optimized_cluster = getOptimizedCluster(local_context, metadata_snapshot, query_info.query);
        if (optimized_cluster)
        {
-            LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
+            LOG_TRACE(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
                    makeFormattedListOfShards(optimized_cluster));
            cluster = optimized_cluster;
            query_info.optimized_cluster = cluster;
        }
        else
        {
-            LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
+            LOG_TRACE(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
                    has_sharding_key ? "" : " (no sharding key)");
        }
    }
@ -536,7 +536,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
        auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_block);
        if (stage)
        {
-            LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
+            LOG_TRACE(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
            return *stage;
        }
    }
@ -789,7 +789,7 @@ void StorageDistributed::startup()
        if (inc > file_names_increment.value)
            file_names_increment.value.store(inc);
    }
-    LOG_DEBUG(log, "Auto-increment is {}", file_names_increment.value);
+    LOG_TRACE(log, "Auto-increment is {}", file_names_increment.value);
 }
@ -799,9 +799,9 @@ void StorageDistributed::shutdown()
    std::lock_guard lock(cluster_nodes_mutex);
-    LOG_DEBUG(log, "Joining background threads for async INSERT");
+    LOG_TRACE(log, "Joining background threads for async INSERT");
    cluster_nodes_data.clear();
-    LOG_DEBUG(log, "Background threads for async INSERT joined");
+    LOG_TRACE(log, "Background threads for async INSERT joined");
 }
 void StorageDistributed::drop()
 {
@ -819,13 +819,13 @@ void StorageDistributed::drop()
    if (relative_data_path.empty())
        return;
-    LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on DROP TABLE");
+    LOG_TRACE(log, "Removing pending blocks for async INSERT from filesystem on DROP TABLE");
    auto disks = data_volume->getDisks();
    for (const auto & disk : disks)
        disk->removeRecursive(relative_data_path);
-    LOG_DEBUG(log, "Removed");
+    LOG_TRACE(log, "Removed");
 }
 Strings StorageDistributed::getDataPaths() const
@ -845,7 +845,7 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, Co
 {
    std::lock_guard lock(cluster_nodes_mutex);
-    LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on TRUNCATE TABLE");
+    LOG_TRACE(log, "Removing pending blocks for async INSERT from filesystem on TRUNCATE TABLE");
    for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end();)
    {
@ -853,7 +853,7 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, Co
        it = cluster_nodes_data.erase(it);
    }
-    LOG_DEBUG(log, "Removed");
+    LOG_TRACE(log, "Removed");
 }
 StoragePolicyPtr StorageDistributed::getStoragePolicy() const
@ -881,7 +881,7 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
            if (std::filesystem::is_empty(dir_path))
            {
-                LOG_DEBUG(log, "Removing {} (used for async INSERT into Distributed)", dir_path.string());
+                LOG_TRACE(log, "Removing {} (used for async INSERT into Distributed)", dir_path.string());
                /// Will be created by DistributedBlockOutputStream on demand.
                std::filesystem::remove(dir_path);
            }
@ -1138,7 +1138,7 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data)
        disk->moveDirectory(relative_data_path, new_path_to_table_data);
        auto new_path = disk->getPath() + new_path_to_table_data;
-        LOG_DEBUG(log, "Updating path to {}", new_path);
+        LOG_TRACE(log, "Updating path to {}", new_path);
        std::lock_guard lock(cluster_nodes_mutex);
        for (auto & node : cluster_nodes_data)
--- a/tests/integration/test_cluster_copier/configs_taxi/config-copier.xml
+++ b/tests/integration/test_cluster_copier/configs_taxi/config-copier.xml
@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <yandex>
    <logger>
-        <level>trace</level>
+        <level>fatal</level>
        <log>/var/log/clickhouse-server/copier/log.log</log>
        <errorlog>/var/log/clickhouse-server/copier/log.err.log</errorlog>
        <size>1000M</size>
--- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml
+++ b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml
@ -0,0 +1,23 @@
 <?xml version="1.0"?>
 <yandex>
    <remote_servers>
        <source>
             <shard>
                 <internal_replication>false</internal_replication>
                 <replica>
                     <host>first</host>
                     <port>9000</port>
                 </replica>
             </shard>
        </source>
        <destination>
             <shard>
                 <internal_replication>false</internal_replication>
                 <replica>
                     <host>second</host>
                     <port>9000</port>
                 </replica>
             </shard>
        </destination>
    </remote_servers>
 </yandex>
--- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml
+++ b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml
@ -0,0 +1,6 @@
 <?xml version="1.0"?>
 <yandex>
    <distributed_ddl>
        <path>/clickhouse/task_queue/ddl</path>
    </distributed_ddl>
 </yandex>
--- a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml
+++ b/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml
@ -0,0 +1,20 @@
 <?xml version="1.0"?>
 <yandex>
    <logger>
        <level>fatal</level>
        <log>/var/log/clickhouse-server/copier/log.log</log>
        <errorlog>/var/log/clickhouse-server/copier/log.err.log</errorlog>
        <size>1000M</size>
        <count>10</count>
        <stderr>/var/log/clickhouse-server/copier/stderr.log</stderr>
        <stdout>/var/log/clickhouse-server/copier/stdout.log</stdout>
    </logger>
    <zookeeper>
        <node index="1">
            <host>zoo1</host>
            <port>2181</port>
        </node>
        <session_timeout_ms>2000</session_timeout_ms>
    </zookeeper>
 </yandex>
--- a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml
+++ b/tests/integration/test_cluster_copier/configs_two_nodes/users.xml
@ -0,0 +1,32 @@
 <?xml version="1.0"?>
 <yandex>
    <profiles>
        <default>
            <log_queries>1</log_queries>
        </default>
    </profiles>
    <users>
        <default>
            <password></password>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>
            <profile>default</profile>
            <quota>default</quota>
        </default>
        <dbuser>
            <password>12345678</password>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>
            <profile>default</profile>
            <quota>default</quota>
        </dbuser>
    </users>
    <quotas>
        <default>
        </default>
    </quotas>
 </yandex>
--- a/tests/integration/test_cluster_copier/task_taxi_data.xml
+++ b/tests/integration/test_cluster_copier/task_taxi_data.xml
@ -32,12 +32,12 @@
     <table_events>
         <cluster_pull>events</cluster_pull>
         <database_pull>dailyhistory</database_pull>
-         <table_pull>yellow_tripdata</table_pull>
+         <table_pull>yellow_tripdata_staging</table_pull>
         <cluster_push>events</cluster_push>
         <database_push>monthlyhistory</database_push>
-         <table_push>yellow_tripdata</table_push>
+         <table_push>yellow_tripdata_staging</table_push>
         <engine>Engine=ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))</engine>
-         <sharding_key>rand()</sharding_key>
+         <sharding_key>sipHash64(id) % 3</sharding_key>
     </table_events>
   </tables>
 </yandex>
--- a/tests/integration/test_cluster_copier/task_with_different_schema.xml
+++ b/tests/integration/test_cluster_copier/task_with_different_schema.xml
@ -0,0 +1,40 @@
 <?xml version="1.0"?>
 <yandex>
 <remote_servers>
        <source>
             <shard>
                 <internal_replication>false</internal_replication>
                 <replica>
                     <host>first</host>
                     <port>9000</port>
                 </replica>
             </shard>
        </source>
        <destination>
             <shard>
                 <internal_replication>false</internal_replication>
                 <replica>
                     <host>second</host>
                     <port>9000</port>
                 </replica>
             </shard>
        </destination>
    </remote_servers>
   <max_workers>2</max_workers>
   <tables>
     <table_events>
         <cluster_pull>source</cluster_pull>
         <database_pull>db_different_schema</database_pull>
         <table_pull>source</table_pull>
         <cluster_push>destination</cluster_push>
         <database_push>db_different_schema</database_push>
         <table_push>destination</table_push>
         <engine>ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column9, Column1, Column2, Column3, Column4)</engine>
         <sharding_key>rand()</sharding_key>
     </table_events>
   </tables>
 </yandex>
--- a/tests/integration/test_cluster_copier/test_tables_with_different_schema.py
+++ b/tests/integration/test_cluster_copier/test_tables_with_different_schema.py
@ -0,0 +1,175 @@
 import os
 import sys
 import time
 import logging
 import subprocess
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
 import docker
 CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
 COPYING_FAIL_PROBABILITY = 0.33
 MOVING_FAIL_PROBABILITY = 0.1
 cluster = None
@pytest.fixture(scope="module")
 def started_cluster():
    global cluster
    try:
        cluster = ClickHouseCluster(__file__)
        for name in ["first", "second", "third"]:
            cluster.add_instance(name,
                main_configs=["configs_two_nodes/conf.d/clusters.xml", "configs_two_nodes/conf.d/ddl.xml"], user_configs=["configs_two_nodes/users.xml"],
                with_zookeeper=True, external_data_path=os.path.join(CURRENT_TEST_DIR, "./data"))
        cluster.start()
        yield cluster
    finally:
        pass
        cluster.shutdown()
 # Will copy table from `first` node to `second`
 class TaskWithDifferentSchema:
    def __init__(self, cluster):
        self.cluster = cluster
        self.zk_task_path = '/clickhouse-copier/task_with_different_schema'
        self.container_task_file = "/task_with_different_schema.xml"
        for instance_name, _ in cluster.instances.items():
            instance = cluster.instances[instance_name]
            instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, './task_with_different_schema.xml'), self.container_task_file)
            print("Copied task file to container of '{}' instance. Path {}".format(instance_name, self.container_task_file))
    def start(self):
        first = cluster.instances["first"]
        first.query("CREATE DATABASE db_different_schema;")
        first.query("""CREATE TABLE db_different_schema.source
        (
            Column1 String,
            Column2 UInt32,
            Column3 Date,
            Column4 DateTime,
            Column5 UInt16,
            Column6 String,
            Column7 String,
            Column8 String,
            Column9 String,
            Column10 String,
            Column11 String,
            Column12 Decimal(3, 1),
            Column13 DateTime,
            Column14 UInt16
        )
        ENGINE = MergeTree()
        PARTITION BY (toYYYYMMDD(Column3), Column3)
        PRIMARY KEY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9)
        ORDER BY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9)
        SETTINGS index_granularity = 8192""")
        first.query("""INSERT INTO db_different_schema.source SELECT * FROM generateRandom(
            'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16,
            Column6 String, Column7 String, Column8 String, Column9 String, Column10 String,
            Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 100;""")
        second = cluster.instances["second"]
        second.query("CREATE DATABASE db_different_schema;")
        second.query("""CREATE TABLE db_different_schema.destination
        (
            Column1 LowCardinality(String) CODEC(LZ4),
            Column2 UInt32 CODEC(LZ4),
            Column3 Date CODEC(DoubleDelta, LZ4),
            Column4 DateTime CODEC(DoubleDelta, LZ4),
            Column5 UInt16 CODEC(LZ4),
            Column6 LowCardinality(String) CODEC(ZSTD),
            Column7 LowCardinality(String) CODEC(ZSTD),
            Column8 LowCardinality(String) CODEC(ZSTD),
            Column9 LowCardinality(String) CODEC(ZSTD),
            Column10 String CODEC(ZSTD(6)),
            Column11 LowCardinality(String) CODEC(LZ4),
            Column12 Decimal(3,1) CODEC(LZ4),
            Column13 DateTime CODEC(DoubleDelta, LZ4),
            Column14 UInt16 CODEC(LZ4)
        ) ENGINE = MergeTree()
        PARTITION BY toYYYYMMDD(Column3)
        ORDER BY (Column9, Column1, Column2, Column3, Column4);""")
        print("Preparation completed")
    def check(self):
        first = cluster.instances["first"]
        second = cluster.instances["second"]
        a = first.query("SELECT count() from db_different_schema.source")
        b = second.query("SELECT count() from db_different_schema.destination")
        assert a == b, "Count"
        a = TSV(first.query("""SELECT sipHash64(*) from db_different_schema.source
            ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)"""))
        b = TSV(second.query("""SELECT sipHash64(*) from db_different_schema.destination
            ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)"""))
        assert a == b, "Data"
 def execute_task(task, cmd_options):
    task.start()
    zk = cluster.get_kazoo_client('zoo1')
    print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]))
    # Run cluster-copier processes on each node
    docker_api = docker.from_env().api
    copiers_exec_ids = []
    cmd = ['/usr/bin/clickhouse', 'copier',
           '--config', '/etc/clickhouse-server/config-copier.xml',
           '--task-path', task.zk_task_path,
           '--task-file', task.container_task_file,
           '--task-upload-force', 'true',
           '--base-dir', '/var/log/clickhouse-server/copier']
    cmd += cmd_options
    print(cmd)
    for instance_name, instance in cluster.instances.items():
        instance = cluster.instances[instance_name]
        container = instance.get_docker_handle()
        instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, "configs_taxi/config-copier.xml"), "/etc/clickhouse-server/config-copier.xml")
        logging.info("Copied copier config to {}".format(instance.name))
        exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
        output = docker_api.exec_start(exec_id).decode('utf8')
        logging.info(output)
        copiers_exec_ids.append(exec_id)
        logging.info("Copier for {} ({}) has started".format(instance.name, instance.ip_address))
    # time.sleep(1000)
    # Wait for copiers stopping and check their return codes
    for exec_id, instance in zip(copiers_exec_ids, iter(cluster.instances.values())):
        while True:
            res = docker_api.exec_inspect(exec_id)
            if not res['Running']:
                break
            time.sleep(1)
        assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res))
    try:
        task.check()
    finally:
        zk.delete(task.zk_task_path, recursive=True)
 # Tests
@pytest.mark.timeout(1200)
 def test1(started_cluster):
    execute_task(TaskWithDifferentSchema(started_cluster), [])
--- a/tests/integration/test_cluster_copier/test_taxi_data.py
+++ b/tests/integration/test_cluster_copier/test_taxi_data.py
@ -114,7 +114,7 @@ class Task:
            junk1 String,  junk2 String
        )
        Engine = ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""")
-        instance.query("CREATE TABLE dailyhistory.yellow_tripdata ON CLUSTER events AS dailyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, rand());")
+        instance.query("CREATE TABLE dailyhistory.yellow_tripdata ON CLUSTER events AS dailyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);")
        # monthly partition database
        instance.query("create database monthlyhistory on cluster events;")
@ -127,25 +127,38 @@ class Task:
            pickup_location_id String,  dropoff_location_id String,  congestion_surcharge String,  junk1 String,  junk2 String
        ) 
        Engine = ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""")
-        instance.query("CREATE TABLE monthlyhistory.yellow_tripdata ON CLUSTER events AS monthlyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, rand());")
+        instance.query("CREATE TABLE monthlyhistory.yellow_tripdata ON CLUSTER events AS monthlyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);")
-        logging.info("Inserting in container")
+        print("Inserting in container")
        first_query = """INSERT INTO dailyhistory.yellow_tripdata(
            vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,
            rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,
            fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge) FORMAT CSV"""
        instance.exec_in_container(['bash', '-c', 'cat /usr/share/clickhouse-external-data/first.csv | /usr/bin/clickhouse client --query="{}"'.format(first_query)], privileged=True)
-        logging.info("Insert completed")
+        
        print("Insert completed")
    def check(self):
        instance = cluster.instances["first"]
        a = TSV(instance.query("SELECT count() from dailyhistory.yellow_tripdata"))
        b = TSV(instance.query("SELECT count() from monthlyhistory.yellow_tripdata"))
        assert a == b, "Distributed tables"
        for instance_name, instance in cluster.instances.items():
            instance = cluster.instances[instance_name]
            a = instance.query("SELECT count() from dailyhistory.yellow_tripdata_staging")
            b = instance.query("SELECT count() from monthlyhistory.yellow_tripdata_staging")
-            print(a, b)
+            assert a == b, "MergeTree tables on each shard"
-            assert a == b
+
            a = TSV(instance.query("SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id"))
            b = TSV(instance.query("SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id"))
            assert a == b, "Data on each shard"
 def execute_task(task, cmd_options):