Merge branch 'master' of github.com:ClickHouse/ClickHouse

2024-09-20 16:50:48 +00:00 · 2020-02-20 16:35:37 +03:00 · 2020-02-20 16:35:37 +03:00 · 3efcc5e817
commit 3efcc5e817
parent f3423b1835 0c686baf4f
48 changed files with 3438 additions and 2555 deletions
--- a/dbms/programs/copier/Aliases.h
+++ b/dbms/programs/copier/Aliases.h
@ -0,0 +1,26 @@
+#pragma once
+
+#include <Interpreters/Cluster.h>
+
+namespace DB
+
+{
+    using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
+
+    using DatabaseAndTableName = std::pair<String, String>;
+
+    /// Hierarchical description of the tasks
+    struct ShardPartition;
+    struct TaskShard;
+    struct TaskTable;
+    struct TaskCluster;
+    struct ClusterPartition;
+
+    using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
+    using ShardInfo = Cluster::ShardInfo;
+    using TaskShardPtr = std::shared_ptr<TaskShard>;
+    using TasksShard = std::vector<TaskShardPtr>;
+    using TasksTable = std::list<TaskTable>;
+    using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
+}
+
--- a/dbms/programs/copier/CMakeLists.txt
+++ b/dbms/programs/copier/CMakeLists.txt
@ -1,5 +1,17 @@
-set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp)
-set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_common_zookeeper clickhouse_parsers clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions clickhouse_dictionaries string_utils ${Poco_XML_LIBRARY} PUBLIC daemon)
+set(CLICKHOUSE_COPIER_SOURCES
+        ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp)
+
+set(CLICKHOUSE_COPIER_LINK PRIVATE
+        clickhouse_common_zookeeper
+        clickhouse_parsers
+        clickhouse_functions
+        clickhouse_table_functions
+        clickhouse_aggregate_functions
+        clickhouse_dictionaries
+        string_utils ${Poco_XML_LIBRARY} PUBLIC daemon)
+
 set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR})

 clickhouse_program_add(copier)
--- a/dbms/programs/copier/ClusterCopier.cpp
+++ b/dbms/programs/copier/ClusterCopier.cpp
--- a/dbms/programs/copier/ClusterCopier.h
+++ b/dbms/programs/copier/ClusterCopier.h
@ -1,89 +1,175 @@
 #pragma once
-#include <Poco/Util/ServerApplication.h>
-#include <daemon/BaseDaemon.h>

-/* clickhouse cluster copier util
- * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner.
- *
- * See overview in the docs: docs/en/utils/clickhouse-copier.md
- *
- * Implementation details:
- *
- * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through
- * Distributed table (to preform data resharding). So, worker job is a partition of a source shard.
- * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job.
- *
- * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on
- * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards.
- * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition
- * should stop, after a refilling procedure should start.
- *
- * ZooKeeper task node has the following structure:
- *  /task/path_root                     - path passed in --task-path parameter
- *      /description                    - contains user-defined XML config of the task
- *      /task_active_workers            - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation
- *          /server_fqdn#PID_timestamp  - cluster-copier worker ID
- *          ...
- *      /tables             - directory with table tasks
- *      /cluster.db.table1  - directory of table_hits task
- *          /partition1     - directory for partition1
- *              /shards     - directory for source cluster shards
- *                  /1      - worker job for the first shard of partition1 of table test.hits
- *                            Contains info about current status (Active or Finished) and worker ID.
- *                  /2
- *                  ...
- *              /partition_active_workers
- *                  /1      - for each job in /shards a corresponding ephemeral node created in /partition_active_workers
- *                            It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in
- *                            /partition_active_workers).
- *                            Also, it is used to track active workers in the partition (when we need to refill the partition we do
- *                            not DROP PARTITION while there are active workers)
- *                  /2
- *                  ...
- *              /is_dirty   - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is
- *                            detected). If the node appeared workers in this partition should stop and start cleaning and refilling
- *                            partition procedure.
- *                            During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition
- *                            workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node.
- *                  /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker.
- *      /cluster.db.table2
- *          ...
- */
+#include "Aliases.h"
+#include "Internals.h"
+#include "TaskCluster.h"
+#include "TaskTableAndShard.h"
+#include "ShardPartition.h"
+#include "ZooKeeperStaff.h"

 namespace DB
 {

-class ClusterCopierApp : public BaseDaemon
+class ClusterCopier
 {
 public:

-    void initialize(Poco::Util::Application & self) override;
+    ClusterCopier(const String & task_path_,
+                  const String & host_id_,
+                  const String & proxy_database_name_,
+                  Context & context_)
+            :
+            task_zookeeper_path(task_path_),
+            host_id(host_id_),
+            working_database_name(proxy_database_name_),
+            context(context_),
+            log(&Poco::Logger::get("ClusterCopier")) {}

-    void handleHelp(const std::string &, const std::string &);
+    void init();

-    void defineOptions(Poco::Util::OptionSet & options) override;
+    template<typename T>
+    decltype(auto) retry(T && func, UInt64 max_tries = 100);

-    int main(const std::vector<std::string> &) override;
+    void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard) ;
+
+    /// Compute set of partitions, assume set of partitions aren't changed during the processing
+    void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0);
+
+    void uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force);
+
+    void reloadTaskDescription();
+
+    void updateConfigIfNeeded();
+
+    void process(const ConnectionTimeouts & timeouts);
+
+    /// Disables DROP PARTITION commands that used to clear data after errors
+    void setSafeMode(bool is_safe_mode_ = true)
+    {
+        is_safe_mode = is_safe_mode_;
+    }
+
+    void setCopyFaultProbability(double copy_fault_probability_)
+    {
+        copy_fault_probability = copy_fault_probability_;
+    }
+
+
+protected:
+
+    String getWorkersPath() const
+    {
+        return task_cluster->task_zookeeper_path + "/task_active_workers";
+    }
+
+    String getWorkersPathVersion() const
+    {
+        return getWorkersPath() + "_version";
+    }
+
+    String getCurrentWorkerNodePath() const
+    {
+        return getWorkersPath() + "/" + host_id;
+    }
+
+    zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(
+            const zkutil::ZooKeeperPtr &zookeeper,
+            const String &description,
+            bool unprioritized);
+
+    /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock.
+     * State of some task could change during the processing.
+     * We have to ensure that all shards have the finished state and there is no dirty flag.
+     * Moreover, we have to check status twice and check zxid, because state can change during the checking.
+     */
+    bool checkPartitionIsDone(const TaskTable & task_table, const String & partition_name,
+                              const TasksShard & shards_with_partition);
+
+    /// Removes MATERIALIZED and ALIAS columns from create table query
+    static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr &query_ast);
+
+    /// Replaces ENGINE and table name in a create query
+    std::shared_ptr<ASTCreateQuery>
+    rewriteCreateQueryStorage(const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table,
+                              const ASTPtr & new_storage_ast);
+
+    bool tryDropPartition(ShardPartition & task_partition,
+                          const zkutil::ZooKeeperPtr & zookeeper,
+                          const CleanStateClock & clean_state_clock);
+
+
+    static constexpr UInt64 max_table_tries = 1000;
+    static constexpr UInt64 max_shard_partition_tries = 600;
+
+    bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
+
+    PartitionTaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts,
+                                                ShardPartition & task_partition,
+                                                bool is_unprioritized_task);
+
+    PartitionTaskStatus processPartitionTaskImpl(const ConnectionTimeouts & timeouts,
+                                                 ShardPartition & task_partition,
+                                                 bool is_unprioritized_task);
+
+    void dropAndCreateLocalTable(const ASTPtr & create_ast);
+
+    void dropLocalTableIfExists (const DatabaseAndTableName & table_name) const;
+
+    String getRemoteCreateTable(const DatabaseAndTableName & table,
+                                Connection & connection,
+                                const Settings * settings = nullptr);
+
+    ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts,
+                                      TaskShard & task_shard);
+
+    void createShardInternalTables(const ConnectionTimeouts & timeouts,
+                                   TaskShard & task_shard,
+                                   bool create_split = true);
+
+    std::set<String> getShardPartitions(const ConnectionTimeouts & timeouts,
+                                        TaskShard & task_shard);
+
+    bool checkShardHasPartition(const ConnectionTimeouts & timeouts,
+                                TaskShard & task_shard,
+                                const String & partition_quoted_name);
+
+    /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
+      * Returns number of shards for which at least one replica executed query successfully
+      */
+    UInt64 executeQueryOnCluster(
+            const ClusterPtr & cluster,
+            const String & query,
+            const ASTPtr & query_ast_ = nullptr,
+            const Settings * settings = nullptr,
+            PoolMode pool_mode = PoolMode::GET_ALL,
+            UInt64 max_successful_executions_per_shard = 0) const;

 private:
+    String task_zookeeper_path;
+    String task_description_path;
+    String host_id;
+    String working_database_name;

-    using Base = BaseDaemon;
+    /// Auto update config stuff
+    UInt64 task_descprtion_current_version = 1;
+    std::atomic<UInt64> task_descprtion_version{1};
+    Coordination::WatchCallback task_description_watch_callback;
+    /// ZooKeeper session used to set the callback
+    zkutil::ZooKeeperPtr task_description_watch_zookeeper;

-    void mainImpl();
+    ConfigurationPtr task_cluster_initial_config;
+    ConfigurationPtr task_cluster_current_config;
+    Coordination::Stat task_descprtion_current_stat{};

-    void setupLogging();
+    std::unique_ptr<TaskCluster> task_cluster;

-    std::string config_xml_path;
-    std::string task_path;
-    std::string log_level = "debug";
    bool is_safe_mode = false;
-    double copy_fault_probability = 0;
-    bool is_help = false;
+    double copy_fault_probability = 0.0;

-    std::string base_dir;
-    std::string process_path;
-    std::string process_id;
-    std::string host_id;
+    Context & context;
+    Poco::Logger * log;
+
+    std::chrono::milliseconds default_sleep_time{1000};
 };

 }
--- a/dbms/programs/copier/ClusterCopierApp.cpp
+++ b/dbms/programs/copier/ClusterCopierApp.cpp
@ -0,0 +1,172 @@
+#include "ClusterCopierApp.h"
+
+namespace DB
+{
+
+/// ClusterCopierApp
+
+void ClusterCopierApp::initialize(Poco::Util::Application & self)
+{
+    is_help = config().has("help");
+    if (is_help)
+        return;
+
+    config_xml_path = config().getString("config-file");
+    task_path = config().getString("task-path");
+    log_level = config().getString("log-level", "trace");
+    is_safe_mode = config().has("safe-mode");
+    if (config().has("copy-fault-probability"))
+        copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0);
+    base_dir = (config().has("base-dir")) ? config().getString("base-dir") : Poco::Path::current();
+    // process_id is '<hostname>#<start_timestamp>_<pid>'
+    time_t timestamp = Poco::Timestamp().epochTime();
+    auto curr_pid = Poco::Process::id();
+
+    process_id = std::to_string(DateLUT::instance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
+    host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id;
+    process_path = Poco::Path(base_dir + "/clickhouse-copier_" + process_id).absolute().toString();
+    Poco::File(process_path).createDirectories();
+
+    /// Override variables for BaseDaemon
+    if (config().has("log-level"))
+        config().setString("logger.level", config().getString("log-level"));
+
+    if (config().has("base-dir") || !config().has("logger.log"))
+        config().setString("logger.log", process_path + "/log.log");
+
+    if (config().has("base-dir") || !config().has("logger.errorlog"))
+        config().setString("logger.errorlog", process_path + "/log.err.log");
+
+    Base::initialize(self);
+}
+
+
+void ClusterCopierApp::handleHelp(const std::string &, const std::string &)
+{
+    Poco::Util::HelpFormatter helpFormatter(options());
+    helpFormatter.setCommand(commandName());
+    helpFormatter.setHeader("Copies tables from one cluster to another");
+    helpFormatter.setUsage("--config-file <config-file> --task-path <task-path>");
+    helpFormatter.format(std::cerr);
+
+    stopOptionsProcessing();
+}
+
+
+void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
+{
+    Base::defineOptions(options);
+
+    options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
+                              .argument("task-path").binding("task-path"));
+    options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
+                              .argument("task-file").binding("task-file"));
+    options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists")
+                              .argument("task-upload-force").binding("task-upload-force"));
+    options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
+                              .binding("safe-mode"));
+    options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
+                              .argument("copy-fault-probability").binding("copy-fault-probability"));
+    options.addOption(Poco::Util::Option("log-level", "", "sets log level")
+                              .argument("log-level").binding("log-level"));
+    options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories")
+                              .argument("base-dir").binding("base-dir"));
+
+    using Me = std::decay_t<decltype(*this)>;
+    options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help")
+                              .callback(Poco::Util::OptionCallback<Me>(this, &Me::handleHelp)));
+}
+
+
+void ClusterCopierApp::mainImpl()
+{
+    StatusFile status_file(process_path + "/status");
+    ThreadStatus thread_status;
+
+    auto log = &logger();
+    LOG_INFO(log, "Starting clickhouse-copier ("
+            << "id " << process_id << ", "
+            << "host_id " << host_id << ", "
+            << "path " << process_path << ", "
+            << "revision " << ClickHouseRevision::get() << ")");
+
+    auto context = std::make_unique<Context>(Context::createGlobal());
+    context->makeGlobalContext();
+    SCOPE_EXIT(context->shutdown());
+
+    context->setConfig(loaded_config.configuration);
+    context->setApplicationType(Context::ApplicationType::LOCAL);
+    context->setPath(process_path);
+
+    registerFunctions();
+    registerAggregateFunctions();
+    registerTableFunctions();
+    registerStorages();
+    registerDictionaries();
+    registerDisks();
+
+    static const std::string default_database = "_local";
+    context->addDatabase(default_database, std::make_shared<DatabaseMemory>(default_database));
+    context->setCurrentDatabase(default_database);
+
+    /// Initialize query scope just in case.
+    CurrentThread::QueryScope query_scope(*context);
+
+    auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, *context);
+    copier->setSafeMode(is_safe_mode);
+    copier->setCopyFaultProbability(copy_fault_probability);
+
+    auto task_file = config().getString("task-file", "");
+    if (!task_file.empty())
+        copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
+
+    copier->init();
+    copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));
+
+    /// Reset ZooKeeper before removing ClusterCopier.
+    /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object.
+    context->resetZooKeeper();
+}
+
+
+int ClusterCopierApp::main(const std::vector<std::string> &)
+{
+    if (is_help)
+        return 0;
+
+    try
+    {
+        mainImpl();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__);
+        auto code = getCurrentExceptionCode();
+
+        return (code) ? code : -1;
+    }
+
+    return 0;
+}
+
+
+}
+
+#pragma GCC diagnostic ignored "-Wunused-function"
+#pragma GCC diagnostic ignored "-Wmissing-declarations"
+
+int mainEntryClickHouseClusterCopier(int argc, char ** argv)
+{
+    try
+    {
+        DB::ClusterCopierApp app;
+        return app.run(argc, argv);
+    }
+    catch (...)
+    {
+        std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
+        auto code = DB::getCurrentExceptionCode();
+
+        return (code) ? code : -1;
+    }
+}
--- a/dbms/programs/copier/ClusterCopierApp.h
+++ b/dbms/programs/copier/ClusterCopierApp.h
@ -0,0 +1,90 @@
+#pragma once
+
+#include <Poco/Util/ServerApplication.h>
+#include <daemon/BaseDaemon.h>
+
+#include "ClusterCopier.h"
+
+/* clickhouse cluster copier util
+ * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner.
+ *
+ * See overview in the docs: docs/en/utils/clickhouse-copier.md
+ *
+ * Implementation details:
+ *
+ * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through
+ * Distributed table (to preform data resharding). So, worker job is a partition of a source shard.
+ * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job.
+ *
+ * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on
+ * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards.
+ * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition
+ * should stop, after a refilling procedure should start.
+ *
+ * ZooKeeper task node has the following structure:
+ *  /task/path_root                     - path passed in --task-path parameter
+ *      /description                    - contains user-defined XML config of the task
+ *      /task_active_workers            - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation
+ *          /server_fqdn#PID_timestamp  - cluster-copier worker ID
+ *          ...
+ *      /tables             - directory with table tasks
+ *      /cluster.db.table1  - directory of table_hits task
+ *          /partition1     - directory for partition1
+ *              /shards     - directory for source cluster shards
+ *                  /1      - worker job for the first shard of partition1 of table test.hits
+ *                            Contains info about current status (Active or Finished) and worker ID.
+ *                  /2
+ *                  ...
+ *              /partition_active_workers
+ *                  /1      - for each job in /shards a corresponding ephemeral node created in /partition_active_workers
+ *                            It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in
+ *                            /partition_active_workers).
+ *                            Also, it is used to track active workers in the partition (when we need to refill the partition we do
+ *                            not DROP PARTITION while there are active workers)
+ *                  /2
+ *                  ...
+ *              /is_dirty   - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is
+ *                            detected). If the node appeared workers in this partition should stop and start cleaning and refilling
+ *                            partition procedure.
+ *                            During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition
+ *                            workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node.
+ *                  /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker.
+ *      /cluster.db.table2
+ *          ...
+ */
+
+namespace DB
+{
+
+class ClusterCopierApp : public BaseDaemon
+{
+public:
+
+    void initialize(Poco::Util::Application & self) override;
+
+    void handleHelp(const std::string &, const std::string &);
+
+    void defineOptions(Poco::Util::OptionSet & options) override;
+
+    int main(const std::vector<std::string> &) override;
+
+private:
+
+    using Base = BaseDaemon;
+
+    void mainImpl();
+
+    std::string config_xml_path;
+    std::string task_path;
+    std::string log_level = "trace";
+    bool is_safe_mode = false;
+    double copy_fault_probability = 0;
+    bool is_help = false;
+
+    std::string base_dir;
+    std::string process_path;
+    std::string process_id;
+    std::string host_id;
+};
+
+}
--- a/dbms/programs/copier/ClusterPartition.h
+++ b/dbms/programs/copier/ClusterPartition.h
@ -0,0 +1,17 @@
+#pragma once
+
+#include "Aliases.h"
+
+namespace DB
+{
+    /// Contains info about all shards that contain a partition
+    struct ClusterPartition
+    {
+        double elapsed_time_seconds = 0;
+        UInt64 bytes_copied = 0;
+        UInt64 rows_copied = 0;
+        UInt64 blocks_copied = 0;
+
+        UInt64 total_tries = 0;
+    };
+}
--- a/dbms/programs/copier/Internals.cpp
+++ b/dbms/programs/copier/Internals.cpp
@ -0,0 +1,141 @@
+#include "Internals.h"
+
+namespace DB
+{
+
+ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data)
+{
+    std::stringstream ss(xml_data);
+    Poco::XML::InputSource input_source{ss};
+    return {new Poco::Util::XMLConfiguration{&input_source}};
+}
+
+
+String getQuotedTable(const String & database, const String & table)
+{
+    if (database.empty())
+        return backQuoteIfNeed(table);
+
+    return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
+}
+
+String getQuotedTable(const DatabaseAndTableName & db_and_table)
+{
+    return getQuotedTable(db_and_table.first, db_and_table.second);
+}
+
+
+// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
+std::shared_ptr<ASTStorage> createASTStorageDistributed(
+        const String & cluster_name, const String & database, const String & table,
+        const ASTPtr & sharding_key_ast)
+{
+    auto args = std::make_shared<ASTExpressionList>();
+    args->children.emplace_back(std::make_shared<ASTLiteral>(cluster_name));
+    args->children.emplace_back(std::make_shared<ASTIdentifier>(database));
+    args->children.emplace_back(std::make_shared<ASTIdentifier>(table));
+    if (sharding_key_ast)
+        args->children.emplace_back(sharding_key_ast);
+
+    auto engine = std::make_shared<ASTFunction>();
+    engine->name = "Distributed";
+    engine->arguments = args;
+
+    auto storage = std::make_shared<ASTStorage>();
+    storage->set(storage->engine, engine);
+
+    return storage;
+}
+
+
+BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream)
+{
+    return std::make_shared<SquashingBlockInputStream>(
+            stream,
+            std::numeric_limits<size_t>::max(),
+            std::numeric_limits<size_t>::max());
+}
+
+Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream)
+{
+    return squashStreamIntoOneBlock(stream)->read();
+}
+
+
+bool isExtendedDefinitionStorage(const ASTPtr & storage_ast)
+{
+    const auto & storage = storage_ast->as<ASTStorage &>();
+    return storage.partition_by || storage.order_by || storage.sample_by;
+}
+
+ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
+{
+    String storage_str = queryToString(storage_ast);
+
+    const auto & storage = storage_ast->as<ASTStorage &>();
+    const auto & engine = storage.engine->as<ASTFunction &>();
+
+    if (!endsWith(engine.name, "MergeTree"))
+    {
+        throw Exception(
+                "Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported",
+                ErrorCodes::BAD_ARGUMENTS);
+    }
+
+    if (isExtendedDefinitionStorage(storage_ast))
+    {
+        if (storage.partition_by)
+            return storage.partition_by->clone();
+
+        static const char * all = "all";
+        return std::make_shared<ASTLiteral>(Field(all, strlen(all)));
+    }
+    else
+    {
+        bool is_replicated = startsWith(engine.name, "Replicated");
+        size_t min_args = is_replicated ? 3 : 1;
+
+        if (!engine.arguments)
+            throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
+
+        ASTPtr arguments_ast = engine.arguments->clone();
+        ASTs & arguments = arguments_ast->children;
+
+        if (arguments.size() < min_args)
+            throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str,
+                            ErrorCodes::BAD_ARGUMENTS);
+
+        ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1];
+        return makeASTFunction("toYYYYMM", month_arg->clone());
+    }
+}
+
+ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random)
+{
+    ShardPriority res;
+
+    if (replicas.empty())
+        return res;
+
+    res.is_remote = 1;
+    for (auto & replica : replicas)
+    {
+        if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
+        {
+            res.is_remote = 0;
+            break;
+        }
+    }
+
+    res.hostname_difference = std::numeric_limits<size_t>::max();
+    for (auto & replica : replicas)
+    {
+        size_t difference = getHostNameDifference(local_hostname, replica.host_name);
+        res.hostname_difference = std::min(difference, res.hostname_difference);
+    }
+
+    res.random = random;
+    return res;
+}
+
+}
--- a/dbms/programs/copier/Internals.h
+++ b/dbms/programs/copier/Internals.h
@ -0,0 +1,184 @@
+#pragma once
+
+#include <chrono>
+#include <optional>
+#include <Poco/Util/XMLConfiguration.h>
+#include <Poco/Logger.h>
+#include <Poco/ConsoleChannel.h>
+#include <Poco/FormattingChannel.h>
+#include <Poco/PatternFormatter.h>
+#include <Poco/UUIDGenerator.h>
+#include <Poco/File.h>
+#include <Poco/Process.h>
+#include <Poco/FileChannel.h>
+#include <Poco/SplitterChannel.h>
+#include <Poco/Util/HelpFormatter.h>
+#include <boost/algorithm/string.hpp>
+#include <common/logger_useful.h>
+#include <Common/ThreadPool.h>
+#include <Common/Exception.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/ZooKeeper/KeeperException.h>
+#include <Common/getFQDNOrHostName.h>
+#include <Common/isLocalAddress.h>
+#include <Common/typeid_cast.h>
+#include <Common/ClickHouseRevision.h>
+#include <Common/formatReadable.h>
+#include <Common/DNSResolver.h>
+#include <Common/CurrentThread.h>
+#include <Common/escapeForFileName.h>
+#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/ThreadStatus.h>
+#include <Client/Connection.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterFactory.h>
+#include <Interpreters/InterpreterExistsQuery.h>
+#include <Interpreters/InterpreterShowCreateQuery.h>
+#include <Interpreters/InterpreterDropQuery.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ParserQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/queryToString.h>
+#include <Parsers/ASTDropQuery.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Formats/FormatSettings.h>
+#include <DataStreams/RemoteBlockInputStream.h>
+#include <DataStreams/SquashingBlockInputStream.h>
+#include <DataStreams/AsynchronousBlockInputStream.h>
+#include <DataStreams/copyData.h>
+#include <DataStreams/NullBlockOutputStream.h>
+#include <IO/ConnectionTimeouts.h>
+#include <IO/Operators.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Functions/registerFunctions.h>
+#include <TableFunctions/registerTableFunctions.h>
+#include <AggregateFunctions/registerAggregateFunctions.h>
+#include <Storages/registerStorages.h>
+#include <Storages/StorageDistributed.h>
+#include <Dictionaries/registerDictionaries.h>
+#include <Disks/registerDisks.h>
+#include <Databases/DatabaseMemory.h>
+#include <Common/StatusFile.h>
+
+#include "Aliases.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NO_ZOOKEEPER;
+    extern const int BAD_ARGUMENTS;
+    extern const int UNKNOWN_TABLE;
+    extern const int UNFINISHED;
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+}
+
+
+ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data);
+
+String getQuotedTable(const String & database, const String & table);
+
+String getQuotedTable(const DatabaseAndTableName & db_and_table);
+
+
+enum class TaskState
+{
+    Started = 0,
+    Finished,
+    Unknown
+};
+
+/// Used to mark status of shard partition tasks
+struct TaskStateWithOwner
+{
+    TaskStateWithOwner() = default;
+
+    TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {}
+
+    TaskState state{TaskState::Unknown};
+    String owner;
+
+    static String getData(TaskState state, const String &owner)
+    {
+        return TaskStateWithOwner(state, owner).toString();
+    }
+
+    String toString()
+    {
+        WriteBufferFromOwnString wb;
+        wb << static_cast<UInt32>(state) << "\n" << escape << owner;
+        return wb.str();
+    }
+
+    static TaskStateWithOwner fromString(const String & data)
+    {
+        ReadBufferFromString rb(data);
+        TaskStateWithOwner res;
+        UInt32 state;
+
+        rb >> state >> "\n" >> escape >> res.owner;
+
+        if (state >= static_cast<int>(TaskState::Unknown))
+            throw Exception("Unknown state " + data, ErrorCodes::LOGICAL_ERROR);
+
+        res.state = static_cast<TaskState>(state);
+        return res;
+    }
+};
+
+
+
+struct ShardPriority
+{
+    UInt8 is_remote = 1;
+    size_t hostname_difference = 0;
+    UInt8 random = 0;
+
+    static bool greaterPriority(const ShardPriority & current, const ShardPriority & other)
+    {
+        return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random)
+               < std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random);
+    }
+};
+
+/// Execution status of a task
+enum class PartitionTaskStatus
+{
+    Active,
+    Finished,
+    Error,
+};
+
+
+struct MultiTransactionInfo
+{
+    int32_t code;
+    Coordination::Requests requests;
+    Coordination::Responses responses;
+};
+
+// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
+std::shared_ptr<ASTStorage> createASTStorageDistributed(
+        const String & cluster_name, const String & database, const String & table,
+        const ASTPtr & sharding_key_ast = nullptr);
+
+
+BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream);
+
+Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream);
+
+bool isExtendedDefinitionStorage(const ASTPtr & storage_ast);
+
+ASTPtr extractPartitionKey(const ASTPtr & storage_ast);
+
+ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random);
+
+}
--- a/dbms/programs/copier/ShardPartition.h
+++ b/dbms/programs/copier/ShardPartition.h
@ -0,0 +1,68 @@
+#pragma once
+
+#include "Aliases.h"
+
+namespace DB
+{
+
+/// Just destination partition of a shard
+struct ShardPartition
+{
+    ShardPartition(TaskShard & parent, const String & name_quoted_) : task_shard(parent), name(name_quoted_) {}
+
+    String getPartitionPath() const;
+    String getPartitionCleanStartPath() const;
+    String getCommonPartitionIsDirtyPath() const;
+    String getCommonPartitionIsCleanedPath() const;
+    String getPartitionActiveWorkersPath() const;
+    String getActiveWorkerPath() const;
+    String getPartitionShardsPath() const;
+    String getShardStatusPath() const;
+
+    TaskShard & task_shard;
+    String name;
+};
+
+inline String ShardPartition::getPartitionCleanStartPath() const
+{
+    return getPartitionPath() + "/clean_start";
+}
+
+inline String ShardPartition::getPartitionPath() const
+{
+    return task_shard.task_table.getPartitionPath(name);
+}
+
+inline String ShardPartition::getShardStatusPath() const
+{
+    // schema: /<root...>/tables/<table>/<partition>/shards/<shard>
+    // e.g. /root/table_test.hits/201701/shards/1
+    return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
+}
+
+inline String ShardPartition::getPartitionShardsPath() const
+{
+    return getPartitionPath() + "/shards";
+}
+
+inline String ShardPartition::getPartitionActiveWorkersPath() const
+{
+    return getPartitionPath() + "/partition_active_workers";
+}
+
+inline String ShardPartition::getActiveWorkerPath() const
+{
+    return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
+}
+
+inline String ShardPartition::getCommonPartitionIsDirtyPath() const
+{
+    return getPartitionPath() + "/is_dirty";
+}
+
+inline String ShardPartition::getCommonPartitionIsCleanedPath() const
+{
+    return getCommonPartitionIsDirtyPath() + "/cleaned";
+}
+
+}
--- a/dbms/programs/copier/TaskCluster.h
+++ b/dbms/programs/copier/TaskCluster.h
@ -0,0 +1,96 @@
+#pragma once
+
+#include "Aliases.h"
+
+namespace DB
+{
+
+struct TaskCluster
+{
+    TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
+            : task_zookeeper_path(task_zookeeper_path_), default_local_database(default_local_database_) {}
+
+    void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
+
+    /// Set (or update) settings and max_workers param
+    void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
+
+    /// Base node for all tasks. Its structure:
+    ///  workers/ - directory with active workers (amount of them is less or equal max_workers)
+    ///  description - node with task configuration
+    ///  table_table1/ - directories with per-partition copying status
+    String task_zookeeper_path;
+
+    /// Database used to create temporary Distributed tables
+    String default_local_database;
+
+    /// Limits number of simultaneous workers
+    UInt64 max_workers = 0;
+
+    /// Base settings for pull and push
+    Settings settings_common;
+    /// Settings used to fetch data
+    Settings settings_pull;
+    /// Settings used to insert data
+    Settings settings_push;
+
+    String clusters_prefix;
+
+    /// Subtasks
+    TasksTable table_tasks;
+
+    std::random_device random_device;
+    pcg64 random_engine;
+};
+
+inline void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
+{
+    String prefix = base_key.empty() ? "" : base_key + ".";
+
+    clusters_prefix = prefix + "remote_servers";
+    if (!config.has(clusters_prefix))
+        throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
+
+    Poco::Util::AbstractConfiguration::Keys tables_keys;
+    config.keys(prefix + "tables", tables_keys);
+
+    for (const auto & table_key : tables_keys)
+    {
+        table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
+    }
+}
+
+inline void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
+{
+    String prefix = base_key.empty() ? "" : base_key + ".";
+
+    max_workers = config.getUInt64(prefix + "max_workers");
+
+    settings_common = Settings();
+    if (config.has(prefix + "settings"))
+        settings_common.loadSettingsFromConfig(prefix + "settings", config);
+
+    settings_pull = settings_common;
+    if (config.has(prefix + "settings_pull"))
+        settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
+
+    settings_push = settings_common;
+    if (config.has(prefix + "settings_push"))
+        settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
+
+    auto set_default_value = [] (auto && setting, auto && default_value)
+    {
+        setting = setting.changed ? setting.value : default_value;
+    };
+
+    /// Override important settings
+    settings_pull.readonly = 1;
+    settings_push.insert_distributed_sync = 1;
+    set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
+    set_default_value(settings_pull.max_threads, 1);
+    set_default_value(settings_pull.max_block_size, 8192UL);
+    set_default_value(settings_pull.preferred_block_size_bytes, 0);
+    set_default_value(settings_push.insert_distributed_timeout, 0);
+}
+
+}
--- a/dbms/programs/copier/TaskTableAndShard.h
+++ b/dbms/programs/copier/TaskTableAndShard.h
@ -0,0 +1,274 @@
+#pragma once
+
+#include "Aliases.h"
+#include "Internals.h"
+#include "ClusterPartition.h"
+
+namespace DB
+{
+
+struct TaskShard;
+
+struct TaskTable
+{
+    TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix,
+              const String & table_key);
+
+    TaskCluster & task_cluster;
+
+    String getPartitionPath(const String & partition_name) const;
+    String getPartitionIsDirtyPath(const String & partition_name) const;
+    String getPartitionIsCleanedPath(const String & partition_name) const;
+    String getPartitionTaskStatusPath(const String & partition_name) const;
+
+    String name_in_config;
+
+    /// Used as task ID
+    String table_id;
+
+    /// Source cluster and table
+    String cluster_pull_name;
+    DatabaseAndTableName table_pull;
+
+    /// Destination cluster and table
+    String cluster_push_name;
+    DatabaseAndTableName table_push;
+
+    /// Storage of destination table
+    String engine_push_str;
+    ASTPtr engine_push_ast;
+    ASTPtr engine_push_partition_key_ast;
+
+    /// A Distributed table definition used to split data
+    String sharding_key_str;
+    ASTPtr sharding_key_ast;
+    ASTPtr engine_split_ast;
+
+    /// Additional WHERE expression to filter input data
+    String where_condition_str;
+    ASTPtr where_condition_ast;
+
+    /// Resolved clusters
+    ClusterPtr cluster_pull;
+    ClusterPtr cluster_push;
+
+    /// Filter partitions that should be copied
+    bool has_enabled_partitions = false;
+    Strings enabled_partitions;
+    NameSet enabled_partitions_set;
+
+    /// Prioritized list of shards
+    TasksShard all_shards;
+    TasksShard local_shards;
+
+    ClusterPartitions cluster_partitions;
+    NameSet finished_cluster_partitions;
+
+    /// Parition names to process in user-specified order
+    Strings ordered_partition_names;
+
+    ClusterPartition & getClusterPartition(const String & partition_name)
+    {
+        auto it = cluster_partitions.find(partition_name);
+        if (it == cluster_partitions.end())
+            throw Exception("There are no cluster partition " + partition_name + " in " + table_id, ErrorCodes::LOGICAL_ERROR);
+        return it->second;
+    }
+
+    Stopwatch watch;
+    UInt64 bytes_copied = 0;
+    UInt64 rows_copied = 0;
+
+    template <typename RandomEngine>
+    void initShards(RandomEngine && random_engine);
+};
+
+
+struct TaskShard
+{
+    TaskShard(TaskTable &parent, const ShardInfo &info_) : task_table(parent), info(info_) {}
+
+    TaskTable & task_table;
+
+    ShardInfo info;
+
+    UInt32 numberInCluster() const { return info.shard_num; }
+
+    UInt32 indexInCluster() const { return info.shard_num - 1; }
+
+    String getDescription() const;
+
+    String getHostNameExample() const;
+
+    /// Used to sort clusters by their proximity
+    ShardPriority priority;
+
+    /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
+    ColumnWithTypeAndName partition_key_column;
+
+    /// There is a task for each destination partition
+    TasksPartition partition_tasks;
+
+    /// Which partitions have been checked for existence
+    /// If some partition from this lists is exists, it is in partition_tasks
+    std::set<String> checked_partitions;
+
+    /// Last CREATE TABLE query of the table of the shard
+    ASTPtr current_pull_table_create_query;
+
+    /// Internal distributed tables
+    DatabaseAndTableName table_read_shard;
+    DatabaseAndTableName table_split_shard;
+};
+
+
+inline String TaskTable::getPartitionPath(const String & partition_name) const
+{
+    return task_cluster.task_zookeeper_path             // root
+           + "/tables/" + table_id                      // tables/dst_cluster.merge.hits
+           + "/" + escapeForFileName(partition_name);   // 201701
+}
+
+inline String TaskTable::getPartitionIsDirtyPath(const String & partition_name) const
+{
+    return getPartitionPath(partition_name) + "/is_dirty";
+}
+
+inline String TaskTable::getPartitionIsCleanedPath(const String & partition_name) const
+{
+    return getPartitionIsDirtyPath(partition_name) + "/cleaned";
+}
+
+inline String TaskTable::getPartitionTaskStatusPath(const String & partition_name) const
+{
+    return getPartitionPath(partition_name) + "/shards";
+}
+
+inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix_,
+                     const String & table_key)
+        : task_cluster(parent)
+{
+    String table_prefix = prefix_ + "." + table_key + ".";
+
+    name_in_config = table_key;
+
+    cluster_pull_name = config.getString(table_prefix + "cluster_pull");
+    cluster_push_name = config.getString(table_prefix + "cluster_push");
+
+    table_pull.first = config.getString(table_prefix + "database_pull");
+    table_pull.second = config.getString(table_prefix + "table_pull");
+
+    table_push.first = config.getString(table_prefix + "database_push");
+    table_push.second = config.getString(table_prefix + "table_push");
+
+    /// Used as node name in ZooKeeper
+    table_id = escapeForFileName(cluster_push_name)
+               + "." + escapeForFileName(table_push.first)
+               + "." + escapeForFileName(table_push.second);
+
+    engine_push_str = config.getString(table_prefix + "engine");
+    {
+        ParserStorage parser_storage;
+        engine_push_ast = parseQuery(parser_storage, engine_push_str, 0);
+        engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
+    }
+
+    sharding_key_str = config.getString(table_prefix + "sharding_key");
+    {
+        ParserExpressionWithOptionalAlias parser_expression(false);
+        sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0);
+        engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, sharding_key_ast);
+    }
+
+    where_condition_str = config.getString(table_prefix + "where_condition", "");
+    if (!where_condition_str.empty())
+    {
+        ParserExpressionWithOptionalAlias parser_expression(false);
+        where_condition_ast = parseQuery(parser_expression, where_condition_str, 0);
+
+        // Will use canonical expression form
+        where_condition_str = queryToString(where_condition_ast);
+    }
+
+    String enabled_partitions_prefix = table_prefix + "enabled_partitions";
+    has_enabled_partitions = config.has(enabled_partitions_prefix);
+
+    if (has_enabled_partitions)
+    {
+        Strings keys;
+        config.keys(enabled_partitions_prefix, keys);
+
+        if (keys.empty())
+        {
+            /// Parse list of partition from space-separated string
+            String partitions_str = config.getString(table_prefix + "enabled_partitions");
+            boost::trim_if(partitions_str, isWhitespaceASCII);
+            boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
+        }
+        else
+        {
+            /// Parse sequence of <partition>...</partition>
+            for (const String & key : keys)
+            {
+                if (!startsWith(key, "partition"))
+                    throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+
+                enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
+            }
+        }
+
+        std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
+    }
+}
+
+template<typename RandomEngine>
+inline void TaskTable::initShards(RandomEngine && random_engine)
+{
+    const String & fqdn_name = getFQDNOrHostName();
+    std::uniform_int_distribution<UInt8> get_urand(0, std::numeric_limits<UInt8>::max());
+
+    // Compute the priority
+    for (auto & shard_info : cluster_pull->getShardsInfo())
+    {
+        TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
+        const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
+        task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
+
+        all_shards.emplace_back(task_shard);
+    }
+
+    // Sort by priority
+    std::sort(all_shards.begin(), all_shards.end(),
+              [](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
+              {
+                  return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
+              });
+
+    // Cut local shards
+    auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
+                                            [](const TaskShardPtr & lhs, UInt8 is_remote)
+                                            {
+                                                return lhs->priority.is_remote < is_remote;
+                                            });
+
+    local_shards.assign(all_shards.begin(), it_first_remote);
+}
+
+
+inline String DB::TaskShard::getDescription() const
+{
+    std::stringstream ss;
+    ss << "N" << numberInCluster()
+       << " (having a replica " << getHostNameExample()
+       << ", pull table " + getQuotedTable(task_table.table_pull)
+       << " of cluster " + task_table.cluster_pull_name << ")";
+    return ss.str();
+}
+
+inline String DB::TaskShard::getHostNameExample() const
+{
+    auto &replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
+    return replicas.at(0).readableString();
+}
+
+}
--- a/dbms/programs/copier/ZooKeeperStaff.h
+++ b/dbms/programs/copier/ZooKeeperStaff.h
@ -0,0 +1,224 @@
+#pragma once
+
+/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow.
+  * We assume that we compare values that are not too far away.
+  * For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0.
+  */
+class WrappingUInt32
+{
+public:
+    UInt32 value;
+
+    explicit WrappingUInt32(UInt32 _value)
+            : value(_value)
+    {}
+
+    bool operator<(const WrappingUInt32 & other) const
+    {
+        return value != other.value && *this <= other;
+    }
+
+    bool operator<=(const WrappingUInt32 & other) const
+    {
+        const UInt32 HALF = 1 << 31;
+        return (value <= other.value && other.value - value < HALF)
+               || (value > other.value && value - other.value > HALF);
+    }
+
+    bool operator==(const WrappingUInt32 & other) const
+    {
+        return value == other.value;
+    }
+};
+
+/** Conforming Zxid definition.
+  * cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions
+  *
+  * But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html
+  *
+  * Actually here is the definition of Zxid.
+  * Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id).
+  * This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid
+  * and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2.
+  */
+class Zxid
+{
+public:
+    WrappingUInt32 epoch;
+    WrappingUInt32 counter;
+    explicit Zxid(UInt64 _zxid)
+            : epoch(_zxid >> 32)
+            , counter(_zxid)
+    {}
+
+    bool operator<=(const Zxid & other) const
+    {
+        return (epoch < other.epoch)
+               || (epoch == other.epoch && counter <= other.counter);
+    }
+
+    bool operator==(const Zxid & other) const
+    {
+        return epoch == other.epoch && counter == other.counter;
+    }
+};
+
+/* When multiple ClusterCopiers discover that the target partition is not empty,
+ * they will attempt to clean up this partition before proceeding to copying.
+ *
+ * Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established
+ * based on a happens-before relation between the events.
+ * This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned.
+ * The fact of the partition hygiene is encoded by CleanStateClock.
+ *
+ * For you to know what mzxid means:
+ *
+ * ZooKeeper Stat Structure:
+ * The Stat structure for each znode in ZooKeeper is made up of the following fields:
+ *
+ * -- czxid
+ * The zxid of the change that caused this znode to be created.
+ *
+ * -- mzxid
+ * The zxid of the change that last modified this znode.
+ *
+ * -- ctime
+ * The time in milliseconds from epoch when this znode was created.
+ *
+ * -- mtime
+ * The time in milliseconds from epoch when this znode was last modified.
+ *
+ * -- version
+ * The number of changes to the data of this znode.
+ *
+ * -- cversion
+ * The number of changes to the children of this znode.
+ *
+ * -- aversion
+ * The number of changes to the ACL of this znode.
+ *
+ * -- ephemeralOwner
+ * The session id of the owner of this znode if the znode is an ephemeral node.
+ * If it is not an ephemeral node, it will be zero.
+ *
+ * -- dataLength
+ * The length of the data field of this znode.
+ *
+ * -- numChildren
+ * The number of children of this znode.
+ * */
+
+class LogicalClock
+{
+public:
+    std::optional<Zxid> zxid;
+
+    LogicalClock() = default;
+
+    explicit LogicalClock(UInt64 _zxid)
+            : zxid(_zxid)
+    {}
+
+    bool hasHappened() const
+    {
+        return bool(zxid);
+    }
+
+    /// happens-before relation with a reasonable time bound
+    bool happensBefore(const LogicalClock & other) const
+    {
+        return !zxid
+               || (other.zxid && *zxid <= *other.zxid);
+    }
+
+    bool operator<=(const LogicalClock & other) const
+    {
+        return happensBefore(other);
+    }
+
+    /// strict equality check
+    bool operator==(const LogicalClock & other) const
+    {
+        return zxid == other.zxid;
+    }
+};
+
+
+class CleanStateClock
+{
+public:
+    LogicalClock discovery_zxid;
+    std::optional<UInt32> discovery_version;
+
+    LogicalClock clean_state_zxid;
+    std::optional<UInt32> clean_state_version;
+
+    std::shared_ptr<std::atomic_bool> stale;
+
+    bool is_clean() const
+    {
+        return
+                !is_stale()
+                && (
+                        !discovery_zxid.hasHappened()
+                        || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid));
+    }
+
+    bool is_stale() const
+    {
+        return stale->load();
+    }
+
+    CleanStateClock(
+            const zkutil::ZooKeeperPtr & zookeeper,
+            const String & discovery_path,
+            const String & clean_state_path)
+            : stale(std::make_shared<std::atomic_bool>(false))
+    {
+        Coordination::Stat stat{};
+        String _some_data;
+        auto watch_callback =
+                [stale = stale] (const Coordination::WatchResponse & rsp)
+                {
+                    auto logger = &Poco::Logger::get("ClusterCopier");
+                    if (rsp.error == Coordination::ZOK)
+                    {
+                        switch (rsp.type)
+                        {
+                            case Coordination::CREATED:
+                                LOG_DEBUG(logger, "CleanStateClock change: CREATED, at " << rsp.path);
+                                stale->store(true);
+                                break;
+                            case Coordination::CHANGED:
+                                LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at" << rsp.path);
+                                stale->store(true);
+                        }
+                    }
+                };
+        if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback))
+        {
+            discovery_zxid = LogicalClock(stat.mzxid);
+            discovery_version = stat.version;
+        }
+        if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback))
+        {
+            clean_state_zxid = LogicalClock(stat.mzxid);
+            clean_state_version = stat.version;
+        }
+    }
+
+    bool operator==(const CleanStateClock & other) const
+    {
+        return !is_stale()
+               && !other.is_stale()
+               && discovery_zxid == other.discovery_zxid
+               && discovery_version == other.discovery_version
+               && clean_state_zxid == other.clean_state_zxid
+               && clean_state_version == other.clean_state_version;
+    }
+
+    bool operator!=(const CleanStateClock & other) const
+    {
+        return !(*this == other);
+    }
+};
--- a/dbms/programs/server/config.xml
+++ b/dbms/programs/server/config.xml
@ -403,15 +403,13 @@
    </text_log>
    -->

-    <!-- Uncomment to write metric log into table.
-         Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval.
+    <!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval. -->
    <metric_log>
        <database>system</database>
        <table>metric_log</table>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
        <collect_interval_milliseconds>1000</collect_interval_milliseconds>
    </metric_log>
-    -->

    <!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
         See https://clickhouse.yandex/docs/en/dicts/internal_dicts/
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@ -193,7 +193,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
        {
            stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
                << ", e.displayText() = " << e.displayText()
-                << (with_stacktrace ? getExceptionStackTraceString(e) : "")
+                << (with_stacktrace ? ", Stack trace (when copying this message, always include the lines below):\n\n" + getExceptionStackTraceString(e) : "")
                << (with_extra_info ? getExtraExceptionInfo(e) : "")
                << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
        }
@ -210,9 +210,9 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
                name += " (demangling status: " + toString(status) + ")";

            stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what()
-                << (with_stacktrace ? getExceptionStackTraceString(e) : "")
+                << (with_stacktrace ? ", Stack trace (when copying this message, always include the lines below):\n\n" + getExceptionStackTraceString(e) : "")
                << (with_extra_info ? getExtraExceptionInfo(e) : "")
-                << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
+                << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
        }
        catch (...) {}
    }
--- a/dbms/src/Compression/ICompressionCodec.cpp
+++ b/dbms/src/Compression/ICompressionCodec.cpp
@ -9,23 +9,12 @@
 #include <Compression/CompressionFactory.h>


-namespace ProfileEvents
-{
-    extern const Event ReadCompressedBytes;
-    extern const Event CompressedReadBufferBlocks;
-    extern const Event CompressedReadBufferBytes;
-}
-
 namespace DB
 {

 namespace ErrorCodes
 {
-    extern const int CHECKSUM_DOESNT_MATCH;
-    extern const int TOO_LARGE_SIZE_COMPRESSED;
-    extern const int UNKNOWN_COMPRESSION_METHOD;
    extern const int CANNOT_DECOMPRESS;
-    extern const int SEEK_POSITION_OUT_OF_BOUND;
    extern const int CORRUPTED_DATA;
 }

--- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp
@ -124,6 +124,10 @@ public:
        {
            /// leave other comparisons as is
        }
+        else if (functionIsLikeOperator(node.name)) /// LIKE, NOT LIKE
+        {
+            /// leave as is
+        }
        else if (functionIsInOperator(node.name)) /// IN, NOT IN
        {
            if (auto ident = node.arguments->children.at(0)->as<ASTIdentifier>())
--- a/dbms/src/Interpreters/misc.h
+++ b/dbms/src/Interpreters/misc.h
@ -13,4 +13,9 @@ inline bool functionIsInOrGlobalInOperator(const std::string & name)
    return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn";
 }

+inline bool functionIsLikeOperator(const std::string & name)
+{
+    return name == "like" || name == "notLike";
+}
+
 }
--- a/dbms/src/Storages/MergeTree/KeyCondition.cpp
+++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp
@ -264,6 +264,78 @@ const KeyCondition::AtomMap KeyCondition::atom_map
 };


+static const std::map<std::string, std::string> inverse_relations = {
+        {"equals", "notEquals"},
+        {"notEquals", "equals"},
+        {"less", "greaterOrEquals"},
+        {"greaterOrEquals", "less"},
+        {"greater", "lessOrEquals"},
+        {"lessOrEquals", "greater"},
+        {"in", "notIn"},
+        {"notIn", "in"},
+        {"like", "notLike"},
+        {"notLike", "like"},
+        {"empty", "notEmpty"},
+        {"notEmpty", "empty"},
+};
+
+
+bool isLogicalOperator(const String & func_name)
+{
+    return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint");
+}
+
+/// The node can be one of:
+///   - Logical operator (AND, OR, NOT and indexHint() - logical NOOP)
+///   - An "atom" (relational operator, constant, expression)
+///   - A logical constant expression
+///   - Any other function
+ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversion = false)
+{
+    const ASTFunction * func = node->as<ASTFunction>();
+
+    if (func && isLogicalOperator(func->name))
+    {
+        if (func->name == "not")
+        {
+            return cloneASTWithInversionPushDown(func->arguments->children.front(), !need_inversion);
+        }
+
+        const auto result_node = makeASTFunction(func->name);
+
+        /// indexHint() is a special case - logical NOOP function
+        if (result_node->name != "indexHint" && need_inversion)
+        {
+            result_node->name = (result_node->name == "and") ? "or" : "and";
+        }
+
+        if (func->arguments)
+        {
+            for (const auto & child : func->arguments->children)
+            {
+                result_node->arguments->children.push_back(cloneASTWithInversionPushDown(child, need_inversion));
+            }
+        }
+
+        return result_node;
+    }
+
+    const auto cloned_node = node->clone();
+
+    if (func && inverse_relations.find(func->name) != inverse_relations.cend())
+    {
+        if (need_inversion)
+        {
+            cloned_node->as<ASTFunction>()->name = inverse_relations.at(func->name);
+        }
+
+        return cloned_node;
+    }
+
+    return need_inversion ? makeASTFunction("not", cloned_node) : cloned_node;
+}
+
+
 inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
 inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }

@ -345,21 +417,23 @@ KeyCondition::KeyCondition(
      */
    Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);

-    /// Trasform WHERE section to Reverse Polish notation
-    const auto & select = query_info.query->as<ASTSelectQuery &>();
-    if (select.where())
+    const ASTSelectQuery & select = query_info.query->as<ASTSelectQuery &>();
+    if (select.where() || select.prewhere())
    {
-        traverseAST(select.where(), context, block_with_constants);
+        ASTPtr filter_query;
+        if (select.where() && select.prewhere())
+            filter_query = makeASTFunction("and", select.where(), select.prewhere());
+        else
+            filter_query = select.where() ? select.where() : select.prewhere();

-        if (select.prewhere())
-        {
-            traverseAST(select.prewhere(), context, block_with_constants);
-            rpn.emplace_back(RPNElement::FUNCTION_AND);
-        }
-    }
-    else if (select.prewhere())
-    {
-        traverseAST(select.prewhere(), context, block_with_constants);
+        /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
+          * the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
+          * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
+          * when parsing the AST into internal RPN representation.
+          * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
+          * are pushed down and applied (when possible) to leaf nodes.
+          */
+        traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants);
    }
    else
    {
@ -432,9 +506,9 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
 {
    RPNElement element;

-    if (auto * func = node->as<ASTFunction>())
+    if (const auto * func = node->as<ASTFunction>())
    {
-        if (operatorFromAST(func, element))
+        if (tryParseLogicalOperatorFromAST(func, element))
        {
            auto & args = func->arguments->children;
            for (size_t i = 0, size = args.size(); i < size; ++i)
@ -452,7 +526,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
        }
    }

-    if (!atomFromAST(node, context, block_with_constants, element))
+    if (!tryParseAtomFromAST(node, context, block_with_constants, element))
    {
        element.function = RPNElement::FUNCTION_UNKNOWN;
    }
@ -680,7 +754,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
 }


-bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
+bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
 {
    /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
      *  or itself, wrapped in a chain of possibly-monotonic functions,
@ -768,7 +842,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
                    func_name = "lessOrEquals";
                else if (func_name == "lessOrEquals")
                    func_name = "greaterOrEquals";
-                else if (func_name == "in" || func_name == "notIn" || func_name == "like")
+                else if (func_name == "in" || func_name == "notIn" ||
+                         func_name == "like" || func_name == "notLike" ||
+                         func_name == "startsWith")
                {
                    /// "const IN data_column" doesn't make sense (unlike "data_column IN const")
                    return false;
@ -809,7 +885,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
    return false;
 }

-bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
+bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out)
 {
    /// Functions AND, OR, NOT.
    /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
--- a/dbms/src/Storages/MergeTree/KeyCondition.h
+++ b/dbms/src/Storages/MergeTree/KeyCondition.h
@ -369,8 +369,8 @@ private:
        BoolMask initial_mask) const;

    void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
-    bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
-    bool operatorFromAST(const ASTFunction * func, RPNElement & out);
+    bool tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
+    bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out);

    /** Is node the key column
      *  or expression in which column of key is wrapped by chain of functions,
--- a/dbms/src/Storages/StorageLog.cpp
+++ b/dbms/src/Storages/StorageLog.cpp
@ -540,8 +540,9 @@ void StorageLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLo

 const StorageLog::Marks & StorageLog::getMarksWithRealRowCount() const
 {
-    const String & column_name = getColumns().begin()->name;
-    const IDataType & column_type = *getColumns().begin()->type;
+    /// There should be at least one physical column
+    const String & column_name = getColumns().getAllPhysical().begin()->name;
+    const IDataType & column_type = *getColumns().getAllPhysical().begin()->type;
    String filename;

    /** We take marks from first column.
--- a/dbms/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference
+++ b/dbms/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference
@ -0,0 +1,3 @@
+SELECT \n    k, \n    r.k, \n    name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name = \'A\')
+SELECT \n    k, \n    r.k, \n    name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name LIKE \'A%\')
+SELECT \n    k, \n    r.k, \n    name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name NOT LIKE \'A%\')
--- a/dbms/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql
+++ b/dbms/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql
@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS n;
+DROP TABLE IF EXISTS r;
+
+CREATE TABLE n (k UInt32) ENGINE = Memory;
+CREATE TABLE r (k UInt32, name String) ENGINE = Memory;
+
+SET enable_debug_queries = 1;
+SET enable_optimize_predicate_expression = 0;
+
+ANALYZE SELECT * FROM n, r WHERE n.k = r.k AND r.name = 'A';
+ANALYZE SELECT * FROM n, r WHERE n.k = r.k AND r.name LIKE 'A%';
+ANALYZE SELECT * FROM n, r WHERE n.k = r.k AND r.name NOT LIKE 'A%';
+
+DROP TABLE n;
+DROP TABLE r;
--- a/dbms/tests/queries/0_stateless/01083_functional_index_in_mergetree.reference
+++ b/dbms/tests/queries/0_stateless/01083_functional_index_in_mergetree.reference
@ -0,0 +1,33 @@
+TP1
+7.51
+7.42
+7.41
+7.42
+7.41
+7.42
+7.41
+7.42
+7.41
+7.51
+TP2
+7.42
+7.41
+7.42
+7.51
+7.42
+7.41
+7.51
+7.51
+TP3
+7.42
+7.41
+7.51
+TP4
+7.42
+7.41
+7.42
+7.42
+7.41
+TP5
+7.41
+7.51
--- a/dbms/tests/queries/0_stateless/01083_functional_index_in_mergetree.sql
+++ b/dbms/tests/queries/0_stateless/01083_functional_index_in_mergetree.sql
@ -0,0 +1,33 @@
+SET max_threads = 1;
+
+CREATE TABLE IF NOT EXISTS functional_index_mergetree (x Float64) ENGINE = MergeTree ORDER BY round(x);
+INSERT INTO functional_index_mergetree VALUES (7.42)(7.41)(7.51);
+
+SELECT 'TP1';
+SELECT * FROM functional_index_mergetree WHERE x > 7.42;
+SELECT * FROM functional_index_mergetree WHERE x < 7.49;
+SELECT * FROM functional_index_mergetree WHERE x < 7.5;
+
+SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.49);
+SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.5);
+SELECT * FROM functional_index_mergetree WHERE NOT (NOT x > 7.42);
+
+SELECT 'TP2';
+SELECT * FROM functional_index_mergetree WHERE NOT x > 7.49;
+SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42;
+SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41;
+SELECT * FROM functional_index_mergetree WHERE NOT x < 7.5;
+
+SELECT 'TP3';
+SELECT * FROM functional_index_mergetree WHERE x > 7.41 AND x < 7.51;
+SELECT * FROM functional_index_mergetree WHERE NOT (x > 7.41 AND x < 7.51);
+
+SELECT 'TP4';
+SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41 AND NOT x > 7.49;
+SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42 AND NOT x > 7.42;
+SELECT * FROM functional_index_mergetree WHERE (NOT x < 7.4) AND (NOT x > 7.49);
+
+SELECT 'TP5';
+SELECT * FROM functional_index_mergetree WHERE NOT or(NOT x, toUInt64(x) AND NOT floor(x) > 6, x >= 7.42 AND round(x) <= 7);
+
+DROP TABLE functional_index_mergetree;
--- a/dbms/tests/queries/0_stateless/01083_log_first_column_alias.reference
+++ b/dbms/tests/queries/0_stateless/01083_log_first_column_alias.reference
@ -0,0 +1 @@
+0
--- a/dbms/tests/queries/0_stateless/01083_log_first_column_alias.sql
+++ b/dbms/tests/queries/0_stateless/01083_log_first_column_alias.sql
@ -0,0 +1,7 @@
+DROP TABLE IF EXISTS test_alias;
+
+CREATE TABLE test_alias (a UInt8 ALIAS b, b UInt8) ENGINE Log;
+
+SELECT count() FROM test_alias;
+
+DROP TABLE test_alias;
--- a/docs/en/interfaces/third-party/gui.md
+++ b/docs/en/interfaces/third-party/gui.md
@ -101,6 +101,22 @@ Features:
 - Refactorings.
 - Search and Navigation.

+### Yandex DataLens {#yandex-datalens}
+
+[Yandex DataLens](https://cloud.yandex.ru/services/datalens) is a service of data visualization and analytics.
+
+Features:
+
+- Wide range of available visualizations, from simple bar charts to complex dashboards.
+- Dashboards could be made publicly available.
+- Support for multiple data sources including ClickHouse.
+- Storage for materialized data based on ClickHouse.
+
+DataLens is [available for free](https://cloud.yandex.com/docs/datalens/pricing) for low-load projects, even for commercial use.
+
+- [DataLens documentation](https://cloud.yandex.com/docs/datalens/).
+- [Tutorial](https://cloud.yandex.com/docs/datalens/solutions/data-from-db-visualization) on visualizing data from a ClickHouse database.
+
 ### Holistics Software

 [Holistics](https://www.holistics.io/) is a full-stack data platform and business intelligence tool. 
--- a/docs/en/operations/table_engines/replication.md
+++ b/docs/en/operations/table_engines/replication.md
@ -27,7 +27,7 @@ ClickHouse uses [Apache ZooKeeper](https://zookeeper.apache.org) for storing rep
 To use replication, set parameters in the [zookeeper](../server_settings/settings.md#server-settings_zookeeper) server configuration section.

 !!! attention "Attention"
-    Don't neglect the securiry setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.
+    Don't neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.

 Example of setting the addresses of the ZooKeeper cluster:

--- a/docs/en/query_language/create.md
+++ b/docs/en/query_language/create.md
@ -275,8 +275,6 @@ Views look the same as normal tables. For example, they are listed in the result

 There isn't a separate query for deleting views. To delete a view, use `DROP TABLE`.

-[Original article](https://clickhouse.tech/docs/en/query_language/create/) <!--hide-->
-
 ## CREATE DICTIONARY {#create-dictionary-query}

 ```sql
@ -300,3 +298,5 @@ External dictionary structure consists of attributes. Dictionary attributes are
 Depending on dictionary [layout](dicts/external_dicts_dict_layout.md) one or more attributes can be specified as dictionary keys.

 For more information, see [External Dictionaries](dicts/external_dicts.md) section.
+
+[Original article](https://clickhouse.tech/docs/en/query_language/create/) <!--hide-->
--- a/docs/en/query_language/dicts/external_dicts.md
+++ b/docs/en/query_language/dicts/external_dicts.md
@ -6,7 +6,7 @@ ClickHouse:

 - Fully or partially stores dictionaries in RAM.
 - Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically.
- Allows to create external dictionaries with xml-files or [DDL queries](../create.md#create-dictionary-query).
+- Allows to create external dictionaries with xml files or [DDL queries](../create.md#create-dictionary-query).

 The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../operations/server_settings/settings.md#server_settings-dictionaries_config) parameter.

@ -34,12 +34,16 @@ You can [configure](external_dicts_dict.md) any number of dictionaries in the sa

 [DDL queries for dictionaries](../create.md#create-dictionary-query) doesn't require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views.

-!!! attention
+!!! attention "Attention"
    You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md) function). This functionality is not related to external dictionaries.

-**See also**
+## See also {#ext-dicts-see-also}

+- [Configuring an External Dictionary](external_dicts_dict.md)
+- [Storing Dictionaries in Memory](external_dicts_dict_layout.md)
+- [Dictionary Updates](external_dicts_dict_lifetime.md)
+- [Sources of External Dictionaries](external_dicts_dict_sources.md)
+- [Dictionary Key and Fields](external_dicts_dict_structure.md)
 - [Functions for Working with External Dictionaries](../functions/ext_dict_functions.md)

-
 [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts/) <!--hide-->
--- a/docs/en/query_language/dicts/external_dicts_dict.md
+++ b/docs/en/query_language/dicts/external_dicts_dict.md
@ -1,6 +1,6 @@
 # Configuring an External Dictionary {#dicts-external_dicts_dict}

-If dictionary is configured using xml-file, than dictionary configuration has the following structure:
+If dictionary is configured using xml file, than dictionary configuration has the following structure:

 ```xml
 <dictionary>
@ -37,7 +37,7 @@ LAYOUT(...) -- Memory layout configuration
 LIFETIME(...) -- Lifetime of dictionary in memory
 ```

- name – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`.
+- `name` – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`.
 - [source](external_dicts_dict_sources.md) — Source of the dictionary.
 - [layout](external_dicts_dict_layout.md) — Dictionary layout in memory.
 - [structure](external_dicts_dict_structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key.
--- a/docs/en/query_language/dicts/external_dicts_dict_layout.md
+++ b/docs/en/query_language/dicts/external_dicts_dict_layout.md
@ -34,7 +34,7 @@ The configuration looks like this:
 </yandex>
 ```

-in case of [DDL-query](../create.md#create-dictionary-query), equal configuration will looks like
+Corresponding [DDL-query](../create.md#create-dictionary-query):

 ```sql
 CREATE DICTIONARY (...)
--- a/docs/en/query_language/dicts/external_dicts_dict_structure.md
+++ b/docs/en/query_language/dicts/external_dicts_dict_structure.md
@ -47,10 +47,14 @@ Attributes are described in the query body:

 ClickHouse supports the following types of keys:

- Numeric key. UInt64. Defined in the `<id>` tag or using `PRIMARY KEY` keyword.
+- Numeric key. `UInt64`. Defined in the `<id>` tag or using `PRIMARY KEY` keyword.
 - Composite key. Set of values of different types. Defined in the tag `<key>` or `PRIMARY KEY` keyword.

-A xml structure can contain either `<id>` or `<key>`. DDL-query must contain single `PRIMARY KEY`.
+An xml structure can contain either `<id>` or `<key>`. DDL-query must contain single `PRIMARY KEY`.
+
+!!! warning "Warning"
+    You must not describe key as an attribute.
+

 ### Numeric Key {#ext_dict-numeric-key}

--- a/docs/en/query_language/show.md
+++ b/docs/en/query_language/show.md
@ -81,7 +81,7 @@ SELECT name FROM system.dictionaries WHERE database = <db> [AND name LIKE <patte

 **Example**

-The following query selects the first two rows from the list of tables in the `system` database, whose names contain `co`.
+The following query selects the first two rows from the list of tables in the `system` database, whose names contain `reg`.

 ```sql
 SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
@ -92,3 +92,5 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
 │ region_names │
 └──────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/query_language/show/) <!--hide-->
--- a/docs/ja/introduction/performance.md
+++ b/docs/ja/introduction/performance.md
@ -1 +0,0 @@
-../../en/introduction/performance.md
--- a/docs/ja/introduction/performance.md
+++ b/docs/ja/introduction/performance.md
@ -0,0 +1,25 @@
+# パフォーマンス
+
+Yandexの内部テスト結果によると、ClickHouseは、テスト可能なクラスのシステム間で同等の動作シナリオで最高のパフォーマンス(長時間のクエリで最も高いスループットと、短時間のクエリで最小のレイテンシの両方)を示します。 [別のページで](https://clickhouse.yandex/benchmark.html)テスト結果を表示できます 。
+
+これは、多数の独立したベンチマークでも確認されています。インターネット検索で見つけることは難しくありませんし、 [ 私達がまとめた関連リンク集 ](https://clickhouse.yandex/#independent-benchmarks) から見つけることもできます。
+
+## 単一の巨大なクエリのスループット
+
+スループットは、1秒あたりの行数または1秒あたりのメガバイト数で測定できます。データがページキャッシュに配置される場合、モダンなハードウェアで実行される、それほど複雑でないクエリは、単一サーバ上の非圧縮データに対し 約2〜10GB/秒 の速度で処理されます (最も単純な場合、速度は30GB/秒)。データがページキャッシュに配置されない場合、速度はディスクサブシステムとデータ圧縮率に依存します。たとえば、ディスクサブシステムが400 MB /秒でのデータの読み取りを許可し、データ圧縮率が3の場合、速度は約1.2 GB /秒になります。 1秒あたりの行数で速度を計算するには、1秒あたりのバイト数での速度をクエリで使用される列の合計サイズで除算します。たとえば、10バイトの列が抽出される場合、速度は1秒あたり約1億から2億行になります。
+
+分散処理の場合、処理速度はほぼ線形に向上しますが、これは集約または並べ替えの結果として生じる行の数があまり大きくない場合のみです。
+
+## 短いクエリを処理するときのレイテンシ
+
+クエリが主キーを使用し、処理する行数がそれほど多くなく(数十万)、列数も多くない場合、データがページキャッシュにあれば50ミリ秒未満のレイテンシ(最良の場合は1桁のミリ秒)が期待できます。それ以外の場合、レイテンシはシーク数から計算されます。ディスクドライブを使用する場合、過負荷になっていないシステムの場合、レイテンシは次の式で計算されます: シーク時間(10ミリ秒) * クエリされる列の数 * データ部分の数
+
+## 大量の短いクエリを処理するときのスループット
+
+同じ条件下で、ClickHouseは1台のサーバーで毎秒数百のクエリを処理できます(最良の場合は数千まで)。このシナリオは分析DBMSでは一般的ではないため、1秒あたり最大100クエリを想定することをお勧めします。
+
+## データ挿入時のパフォーマンス
+
+少なくとも1000行のパケットにデータを挿入することをお勧めします。または、1秒あたり1回のリクエストを超えないでください。タブ区切りのダンプデータをMergeTreeテーブルに挿入する場合、挿入速度は50〜200MB/sになります。挿入された行のサイズが約1Kbの場合、速度は毎秒50,000〜200,000行になります。行が小さい場合、パフォーマンスは1秒あたりの行数で高くなります(Banner System データ- `>` 500,000行/秒、Graphite データ- `>` 1,000,000行/秒)。パフォーマンスを向上させるために、複数のINSERTクエリを並行して作成することで、パフォーマンスを線形に向上できます。
+
+[Original article](https://clickhouse.yandex/docs/ja/introduction/performance/) <!--hide-->
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@ -104,6 +104,22 @@
 - Рефакторинги.
 - Поиск и навигация.

+### Yandex DataLens {#yandex-datalens}
+
+[Yandex DataLens](https://cloud.yandex.ru/services/datalens) — cервис визуализации и анализа данных.
+
+Основные возможности:
+
+- Широкий выбор инструментов визуализации, от простых столбчатых диаграмм до сложных дашбордов.
+- Возможность опубликовать дашборды на широкую аудиторию.
+- Поддержка множества источников данных, включая ClickHouse.
+- Хранение материализованных данных в кластере ClickHouse DataLens.
+
+Для небольших проектов DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования.
+
+- [Документация DataLens](https://cloud.yandex.ru/docs/datalens/).
+- [Пособие по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization).
+
 ### Holistics Software

 [Holistics](https://www.holistics.io/) — full-stack платформа для обработки данных и бизнес-аналитики.
--- a/docs/ru/query_language/create.md
+++ b/docs/ru/query_language/create.md
@ -274,4 +274,28 @@ SELECT a, b, c FROM (SELECT ...)

 Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.

+## CREATE DICTIONARY {#create-dictionary-query}
+
+```sql
+CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name
+(
+    key1 type1  [DEFAULT|EXPRESSION expr1] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
+    key2 type2  [DEFAULT|EXPRESSION expr2] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
+    attr1 type2 [DEFAULT|EXPRESSION expr3],
+    attr2 type2 [DEFAULT|EXPRESSION expr4]
+)
+PRIMARY KEY key1, key2
+SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
+LAYOUT(LAYOUT_NAME([param_name param_value]))
+LIFETIME([MIN val1] MAX val2)
+```
+
+Создаёт [внешний словарь](dicts/external_dicts.md) с заданной [структурой](dicts/external_dicts_dict_structure.md), [источником](dicts/external_dicts_dict_sources.md), [способом размещения в памяти](dicts/external_dicts_dict_layout.md) и [периодом обновления](dicts/external_dicts_dict_lifetime.md).
+
+Структура внешнего словаря состоит из атрибутов. Атрибуты словаря задаются как столбцы таблицы. Единственным обязательным свойством атрибута является его тип, все остальные свойства могут иметь значения по умолчанию.
+
+В зависимости от [способа размещения словаря в памяти](dicts/external_dicts_dict_layout.md), ключами словаря могут быть один и более атрибутов.
+
+Смотрите [Внешние словари](dicts/external_dicts.md).
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/create/) <!--hide-->
--- a/docs/ru/query_language/dicts/external_dicts.md
+++ b/docs/ru/query_language/dicts/external_dicts.md
@ -2,9 +2,12 @@

 Существует возможность подключать собственные словари из различных источников данных. Источником данных для словаря может быть локальный текстовый/исполняемый файл, HTTP(s) ресурс или другая СУБД. Подробнее смотрите в разделе "[Источники внешних словарей](external_dicts_dict_sources.md)".

-ClickHouse полностью или частично хранит словари в оперативной памяти. Словари можно подгружать динамически, ClickHouse периодически обновляет их и динамически подгружает отсутствующие значения.
+ClickHouse:
+- Полностью или частично хранит словари в оперативной памяти.
+- Периодически обновляет их и динамически подгружает отсутствующие значения.
+- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../create.md#create-dictionary-query).

-Конфигурация внешних словарей находится в одном или нескольких файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../operations/server_settings/settings.md).
+Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../operations/server_settings/settings.md).

 Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries_lazy_load](../../operations/server_settings/settings.md).

@ -30,12 +33,15 @@ ClickHouse полностью или частично хранит словар
 </yandex>
 ```

-В одном файле можно [сконфигурировать](external_dicts_dict.md) произвольное количество словарей. Формат файла сохраняется даже если словарь один (т.е. `<yandex><dictionary> <!--configuration--> </dictionary></yandex>`).
+В одном файле можно [сконфигурировать](external_dicts_dict.md) произвольное количество словарей.

->можете преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md)). Эта функциональность не связана с внешними словарями.
+Если вы создаёте внешние словари [DDL-запросами](../create.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера.
+
+!!! attention "Внимание"
+    Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md)). Эта функциональность не связана с внешними словарями.


-Смотрите также:
+## Смотрите также {#ext-dicts-see-also}

 - [Настройка внешнего словаря](external_dicts_dict.md)
 - [Хранение словарей в памяти](external_dicts_dict_layout.md)
--- a/docs/ru/query_language/dicts/external_dicts_dict.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict.md
@ -1,11 +1,15 @@
 # Настройка внешнего словаря {#dicts-external_dicts_dict}

-Конфигурация словаря имеет следующую структуру:
+XML-конфигурация словаря имеет следующую структуру:

 ```xml
 <dictionary>
    <name>dict_name</name>

+    <structure>
+      <!-- Complex key configuration -->
+    </structure>
+
    <source>
      <!-- Source configuration -->
    </source>
@ -14,20 +18,29 @@
      <!-- Memory layout configuration -->
    </layout>

-    <structure>
-      <!-- Complex key configuration -->
-    </structure>
-
    <lifetime>
      <!-- Lifetime of dictionary in memory -->
    </lifetime>
 </dictionary>
 ```

- name - Идентификатор, под которым словарь будет доступен для использования. Используйте символы `[a-zA-Z0-9_\-]`.
- [source](external_dicts_dict_sources.md) - Источник словаря.
- [layout](external_dicts_dict_layout.md) - Размещение словаря в памяти.
- [structure](external_dicts_dict_structure.md) - Структура словаря. Ключ и атрибуты, которые можно получить по ключу.
- [lifetime](external_dicts_dict_lifetime.md) - Периодичность обновления словарей.
+Соответствующий [DDL-запрос](../create.md#create-dictionary-query) имеет следующий вид:
+
+```sql
+CREATE DICTIONARY dict_name
+(
+    ... -- attributes
+)
+PRIMARY KEY ... -- complex or single key configuration
+SOURCE(...) -- Source configuration
+LAYOUT(...) -- Memory layout configuration
+LIFETIME(...) -- Lifetime of dictionary in memory
+```
+
+- `name` — Идентификатор, под которым словарь будет доступен для использования. Используйте символы `[a-zA-Z0-9_\-]`.
+- [source](external_dicts_dict_sources.md) — Источник словаря.
+- [layout](external_dicts_dict_layout.md) — Размещение словаря в памяти.
+- [structure](external_dicts_dict_structure.md) — Структура словаря. Ключ и атрибуты, которые можно получить по ключу.
+- [lifetime](external_dicts_dict_lifetime.md) — Периодичность обновления словарей.

 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts_dict/) <!--hide-->
--- a/docs/ru/query_language/dicts/external_dicts_dict_layout.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict_layout.md
@ -35,15 +35,25 @@
 ```


+Соответствущий [DDL-запрос](../create.md#create-dictionary-query):
+
+```sql
+CREATE DICTIONARY (...)
+...
+LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
+...
+```
+
 ## Способы размещения словарей в памяти

-   [flat](#flat)
-   [hashed](#hashed)
-   [cache](#cache)
-   [range_hashed](#range-hashed)
-   [complex_key_hashed](#complex-key-hashed)
-   [complex_key_cache](#complex-key-cache)
-   [ip_trie](#ip-trie)
+- [flat](#flat)
+- [hashed](#hashed)
+- [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
+- [cache](#cache)
+- [range_hashed](#range-hashed)
+- [complex_key_hashed](#complex-key-hashed)
+- [complex_key_cache](#complex-key-cache)
+- [ip_trie](#ip-trie)

 ### flat

@ -63,6 +73,12 @@
 </layout>
 ```

+или
+
+```sql
+LAYOUT(FLAT())
+```
+
 ### hashed

 Словарь полностью хранится в оперативной памяти в виде хэш-таблиц. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике, количество ключей может достигать десятков миллионов элементов.
@ -77,6 +93,29 @@
 </layout>
 ```

+или
+
+```sql
+LAYOUT(HASHED())
+```
+
+### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed}
+
+Аналогичен `hashed`, но при этом занимает меньше места в памяти и генерирует более высокую загрузку CPU.
+
+Пример конфигурации:
+
+```xml
+<layout>
+  <sparse_hashed />
+</layout>
+```
+
+или
+
+```sql
+LAYOUT(SPARSE_HASHED())
+```

 ### complex_key_hashed 

@ -90,6 +129,12 @@
 </layout>
 ```

+или
+
+```sql
+LAYOUT(COMPLEX_KEY_HASHED())
+```
+

 ### range_hashed

@ -131,6 +176,19 @@
    ...
 ```

+или
+
+```sql
+CREATE DICTIONARY somedict (
+    id UInt64,
+    first Date,
+    last Date
+)
+PRIMARY KEY id
+LAYOUT(RANGE_HASHED())
+RANGE(MIN first MAX last)
+```
+
 Для работы с такими словарями в функцию `dictGetT` необходимо передавать дополнительный аргумент, для которого подбирается диапазон:

    dictGetT('dict_name', 'attr_name', id, date)
@ -178,6 +236,18 @@
 </yandex>
 ```

+или
+
+```sql
+CREATE DICTIONARY somedict(
+    Abcdef UInt64,
+    StartTimeStamp UInt64,
+    EndTimeStamp UInt64,
+    XXXType String DEFAULT ''
+)
+PRIMARY KEY Abcdef
+RANGE(MIN StartTimeStamp MAX EndTimeStamp)
+```

 ### cache

@ -204,6 +274,12 @@
 </layout>
 ```

+или
+
+```sql
+LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
+```
+
 Укажите достаточно большой размер кэша. Количество ячеек следует подобрать экспериментальным путём:

 1. Выставить некоторое значение.
@ -265,6 +341,17 @@
    ...
 ```

+или
+
+```sql
+CREATE DICTIONARY somedict (
+    prefix String,
+    asn UInt32,
+    cca2 String DEFAULT '??'
+)
+PRIMARY KEY prefix
+```
+
 Этот ключ должен иметь только один атрибут типа `String`, содержащий допустимый префикс IP. Другие типы еще не поддерживаются.

 Для запросов необходимо использовать те же функции (`dictGetT` с кортежем), что и для словарей с составными ключами:
--- a/docs/ru/query_language/dicts/external_dicts_dict_lifetime.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict_lifetime.md
@ -14,6 +14,15 @@ ClickHouse периодически обновляет словари. Инте
    ...
 </dictionary>
 ```
+или
+
+```sql
+CREATE DICTIONARY (...)
+...
+LIFETIME(300)
+...
+```
+

 Настройка `<lifetime>0</lifetime>` запрещает обновление словарей.

@ -32,6 +41,12 @@ ClickHouse периодически обновляет словари. Инте
 </dictionary>
 ```

+или
+
+```sql
+LIFETIME(MIN 300 MAX 360)
+```
+
 При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external_dicts_dict_sources.md):

 > -   У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
@ -56,4 +71,12 @@ ClickHouse периодически обновляет словари. Инте
 </dictionary>
 ```

+или
+
+```sql
+...
+SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1'))
+...
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts_dict_lifetime/) <!--hide-->
--- a/docs/ru/query_language/dicts/external_dicts_dict_sources.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict_sources.md
@ -3,7 +3,7 @@

 Внешний словарь можно подключить из множества источников.

-Общий вид конфигурации:
+Общий вид XML-конфигурации:

 ```xml
 <yandex>
@ -20,6 +20,16 @@
 </yandex>
 ```

+Аналогичный [DDL-запрос](../create.md#create-dictionary-query):
+
+```sql
+CREATE DICTIONARY dict_name (...)
+...
+SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration
+...
+```
+
+
 Источник настраивается в разделе `source`.

 Типы источников (`source_type`):
@ -48,10 +58,16 @@
 </source>
 ```

+или
+
+```sql
+SOURCE(FILE(path '/opt/dictionaries/os.tsv' format 'TabSeparated'))
+```
+
 Поля настройки:

-   `path` - Абсолютный путь к файлу.
-   `format` - Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".
+- `path` — Абсолютный путь к файлу.
+- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".


 ## Исполняемый файл {#dicts-external_dicts_dict_sources-executable}
@ -69,10 +85,16 @@
 </source>
 ```

+или
+
+```sql
+SOURCE(EXECUTABLE(command 'cat /opt/dictionaries/os.tsv' format 'TabSeparated'))
+```
+
 Поля настройки:

-   `command` - Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
-   `format` - Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".
+-   `command` — Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
+-   `format` — Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".


 ## HTTP(s) {#dicts-external_dicts_dict_sources-http}
@ -86,16 +108,37 @@
    <http>
        <url>http://[::1]/os.tsv</url>
        <format>TabSeparated</format>
+        <credentials>
+            <user>user</user>
+            <password>password</password>
+        </credentials>
+        <headers>
+            <header>
+                <name>API-KEY</name>
+                <value>key</value>
+            </header>
+        </headers>
    </http>
 </source>
 ```

+или
+
+```sql
+SOURCE(HTTP(
+    url 'http://[::1]/os.tsv'
+    format 'TabSeparated'
+    credentials(user 'user' password 'password')
+    headers(header(name 'API-KEY' value 'key'))
+))
+```
+
 Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо [настроить openSSL](../../operations/server_settings/settings.md) в конфигурации сервера.

 Поля настройки:

-   `url` - URL источника.
-   `format` - Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".
+-   `url` — URL источника.
+-   `format` — Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)".


 ## ODBC {#dicts-external_dicts_dict_sources-odbc}
@ -105,20 +148,33 @@
 Пример настройки:

 ```xml
-<odbc>
-    <db>DatabaseName</db>
-    <table>ShemaName.TableName</table>
-    <connection_string>DSN=some_parameters</connection_string>
-    <invalidate_query>SQL_QUERY</invalidate_query>
-</odbc>
+<source>
+    <odbc>
+        <db>DatabaseName</db>
+        <table>ShemaName.TableName</table>
+        <connection_string>DSN=some_parameters</connection_string>
+        <invalidate_query>SQL_QUERY</invalidate_query>
+    </odbc>
+</source>
+```
+
+или
+
+```sql
+SOURCE(ODBC(
+    db 'DatabaseName'
+    table 'SchemaName.TableName'
+    connection_string 'DSN=some_parameters'
+    invalidate_query 'SQL_QUERY'
+))
 ```

 Поля настройки:

-   `db` - имя базы данных. Не указывать, если имя базы задано в параметрах. `<connection_string>`.
-   `table` - имя таблицы и схемы, если она есть.
-   `connection_string` - строка соединения.
-   `invalidate_query` - запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external_dicts_dict_lifetime.md).
+-   `db` — имя базы данных. Не указывать, если имя базы задано в параметрах. `<connection_string>`.
+-   `table` — имя таблицы и схемы, если она есть.
+-   `connection_string` — строка соединения.
+-   `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external_dicts_dict_lifetime.md).

 ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных.

@ -216,6 +272,18 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
 </yandex>
 ```

+или
+
+```sql
+CREATE DICTIONARY table_name (
+    id UInt64,
+    some_column UInt64 DEFAULT 0
+)
+PRIMARY KEY id
+SOURCE(ODBC(connection_string 'DSN=myconnection' table 'postgresql_table'))
+LAYOUT(HASHED())
+LIFETIME(MIN 300 MAX 360)
+
 Может понадобиться в `odbc.ini` указать полный путь до библиотеки с драйвером `DRIVER=/usr/local/lib/psqlodbcw.so`.

 ### Пример подключения MS SQL Server
@ -299,6 +367,20 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
 </yandex>
 ```

+или
+
+```sql
+CREATE DICTIONARY test (
+    k UInt64,
+    s String DEFAULT ''
+)
+PRIMARY KEY k
+SOURCE(ODBC(table 'dict' connection_string 'DSN=MSSQL;UID=test;PWD=test'))
+LAYOUT(FLAT())
+LIFETIME(MIN 300 MAX 360)
+```
+
+
 ## СУБД


@ -328,6 +410,22 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
 </source>
 ```

+или
+
+```sql
+SOURCE(MYSQL(
+    port 3306
+    user 'clickhouse'
+    password 'qwerty'
+    replica(host 'example01-1' priority 1)
+    replica(host 'example01-2' priority 1)
+    db 'db_name'
+    table 'table_name'
+    where 'id=10'
+    invalidate_query 'SQL_QUERY'
+))
+```
+
 Поля настройки:

 - `port` — порт сервера MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри `<replica>`).
@ -362,6 +460,21 @@ MySQL можно подключить на локальном хосте чер
 </source>
 ```

+или
+
+```sql
+SOURCE(MYSQL(
+    host 'localhost'
+    socket '/path/to/socket/file.sock'
+    user 'clickhouse'
+    password 'qwerty'
+    db 'db_name'
+    table 'table_name'
+    where 'id=10'
+    invalidate_query 'SQL_QUERY'
+))
+```
+

 ### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse}

@ -381,16 +494,30 @@ MySQL можно подключить на локальном хосте чер
 </source>
 ```

+или
+
+```sql
+SOURCE(CLICKHOUSE(
+    host 'example01-01-1'
+    port 9000
+    user 'default'
+    password ''
+    db 'default'
+    table 'ids'
+    where 'id=10'
+))
+```
+
 Поля настройки:

-   `host` - хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../operations/table_engines/distributed.md) и прописать её в дальнейших настройках.
-   `port` - порт сервера ClickHouse.
-   `user` - имя пользователя ClickHouse.
-   `password` - пароль пользователя ClickHouse.
-   `db` - имя базы данных.
-   `table` - имя таблицы.
-   `where` - условие выбора. Может отсутствовать.
-   `invalidate_query` - запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external_dicts_dict_lifetime.md).
+-   `host` — хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../operations/table_engines/distributed.md) и прописать её в дальнейших настройках.
+-   `port` — порт сервера ClickHouse.
+-   `user` — имя пользователя ClickHouse.
+-   `password` — пароль пользователя ClickHouse.
+-   `db` — имя базы данных.
+-   `table` — имя таблицы.
+-   `where` — условие выбора. Может отсутствовать.
+-   `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external_dicts_dict_lifetime.md).


 ### MongoDB {#dicts-external_dicts_dict_sources-mongodb}
@ -410,14 +537,27 @@ MySQL можно подключить на локальном хосте чер
 </source>
 ```

+или
+
+```sql
+SOURCE(MONGO(
+    host 'localhost'
+    port 27017
+    user ''
+    password ''
+    db 'test'
+    collection 'dictionary_source'
+))
+```
+
 Поля настройки:

-   `host` - хост MongoDB.
-   `port` - порт сервера MongoDB.
-   `user` - имя пользователя MongoDB.
-   `password` - пароль пользователя MongoDB.
-   `db` - имя базы данных.
-   `collection` - имя коллекции.
+-   `host` — хост MongoDB.
+-   `port` — порт сервера MongoDB.
+-   `user` — имя пользователя MongoDB.
+-   `password` — пароль пользователя MongoDB.
+-   `db` — имя базы данных.
+-   `collection` — имя коллекции.

 ### Redis {#dicts-external_dicts_dict_sources-redis}

@ -434,6 +574,17 @@ MySQL можно подключить на локальном хосте чер
 </source>
 ```

+или
+
+```sql
+SOURCE(REDIS(
+    host 'localhost'
+    port 6379
+    storage_type 'simple'
+    db_index 0
+))
+```
+
 Поля настройки:

 - `host` – хост Redis.
--- a/docs/ru/query_language/dicts/external_dicts_dict_structure.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict_structure.md
@ -24,10 +24,10 @@
 Атрибуты описываются элементами:

 - `<id>` — [столбец с ключом](external_dicts_dict_structure.md#ext_dict_structure-key).
- `<attribute>` — [столбец данных](external_dicts_dict_structure.md#ext_dict_structure-attributes). Можно задать несколько столбцов.
+- `<attribute>` — [столбец данных](external_dicts_dict_structure.md#ext_dict_structure-attributes). Можно задать несколько атрибутов.


-Запрос создания словаря:
+Создание словаря запросом:

 ```sql
 CREATE DICTIONARY dict_name (
@ -48,10 +48,10 @@ PRIMARY KEY Id

 ClickHouse поддерживает следующие виды ключей:

- Числовой ключ. `UInt64`. Описывается в теге `<id>`.
- Составной ключ. Набор значений разного типа. Описывается в теге `<key>`.
+- Числовой ключ. `UInt64`. Описывается в теге `<id>` или ключевым словом `PRIMARY KEY`.
+- Составной ключ. Набор значений разного типа. Описывается в теге `<key>` или ключевым словом `PRIMARY KEY`.

-Структура может содержать либо `<id>` либо `<key>`.
+Структура может содержать либо `<id>` либо `<key>`. DDL-запрос может содержать только `PRIMARY KEY`.

 !!! warning "Обратите внимание"
    Ключ не надо дополнительно описывать в атрибутах.
@ -72,6 +72,20 @@ ClickHouse поддерживает следующие виды ключей:

 - `name` — имя столбца с ключами.

+Для DDL-запроса:
+
+```sql
+CREATE DICTIONARY (
+    Id UInt64,
+    ...
+)
+PRIMARY KEY Id
+...
+```
+
+- `PRIMARY KEY` – имя столбца с ключами.
+
+
 ### Составной ключ

 Ключом может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external_dicts_dict_layout.md) должен быть `complex_key_hashed` или `complex_key_cache`.
@ -97,6 +111,18 @@ ClickHouse поддерживает следующие виды ключей:
 ...
 ```

+или
+
+```sql
+CREATE DICTIONARY (
+    field1 String,
+    field2 String
+    ...
+)
+PRIMARY KEY field1, field2
+...
+```
+
 При запросе в функции `dictGet*` в качестве ключа передаётся кортеж. Пример: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`.


@ -119,6 +145,15 @@ ClickHouse поддерживает следующие виды ключей:
 </structure>
 ```

+или
+
+```sql
+CREATE DICTIONARY somename (
+    Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID
+)
+```
+
+
 Поля конфигурации:

 | Тег | Описание | Обязательный |
--- a/docs/ru/query_language/show.md
+++ b/docs/ru/query_language/show.md
@ -3,10 +3,10 @@
 ## SHOW CREATE TABLE

 ```sql
-SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
+SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [FORMAT format]
 ```

-Возвращает один столбец типа `String` с именем statement, содержащий одно значение — запрос `CREATE TABLE`, с помощью которого была создана указанная таблица.
+Возвращает один столбец типа `String` с именем statement, содержащий одно значение — запрос `CREATE TABLE`, с помощью которого был создан указанный объект.

 ## SHOW DATABASES {#show-databases}

@ -62,3 +62,35 @@ SHOW TABLES FROM system LIKE '%co%' LIMIT 2
 │ collations                     │
 └────────────────────────────────┘
 ```
+
+## SHOW DICTIONARIES
+
+Выводит список [внешних словарей](dicts/external_dicts.md).
+
+```sql
+SHOW DICTIONARIES [FROM <db>] [LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+```
+
+Если секция `FROM` не указана, запрос возвращает список словарей из текущей базы данных.
+
+Аналогичный результат можно получить следующим запросом:
+
+```sql
+SELECT name FROM system.dictionaries WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+```
+
+**Example**
+
+Запрос выводит первые две стоки из списка таблиц в базе данных `system`, имена которых содержат `reg`.
+
+```sql
+SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
+```
+```text
+┌─name─────────┐
+│ regions      │
+│ region_names │
+└──────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/show/) <!--hide-->
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -2,35 +2,35 @@ alabaster==0.7.12
 Babel==2.5.1
 backports-abc==0.5
 beautifulsoup4==4.8.2
-certifi==2017.11.5
+certifi==2019.11.28
 chardet==3.0.4
 click==6.7
-CommonMark==0.5.4
+CommonMark==0.9.1
 cssmin==0.2.0
 docutils==0.16
 futures==3.1.1
 htmlmin==0.1.12
-idna==2.6
+idna==2.9
 imagesize==1.2.0
 Jinja2==2.11.1
 jsmin==2.2.2
 livereload==2.5.1
 Markdown==2.6.11
-MarkupSafe==1.0
+MarkupSafe==1.1.1
 mkdocs==1.0.4
 Pygments==2.5.2
 python-slugify==1.2.6
 pytz==2017.3
 PyYAML==5.3
 recommonmark==0.4.0
-requests==2.21.0
+requests==2.23.0
 singledispatch==3.4.0.3
-six==1.11.0
+six==1.14.0
 snowballstemmer==1.2.1
 Sphinx==1.6.5
 sphinxcontrib-websupport==1.0.1
 tornado==5.1
 typing==3.7.4.1
-Unidecode==1.0.23
+Unidecode==1.1.1
 urllib3==1.25.8
 gitpython==2.1.14
--- a/website/workers/events.js
+++ b/website/workers/events.js
@ -0,0 +1,34 @@
+addEventListener('fetch', event => {
+  event.respondWith(handleRequest(event.request))
+})
+
+async function handleRequest(request) {
+  let raw = await fetch('https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/README.md');
+  let text = await raw.text();
+  let lines = text.split('\n');
+  let skip = true;
+  let events = [];
+  for (let idx in lines) {
+      let line = lines[idx];
+      if (skip) {
+          if (line.includes('Upcoming Events')) {
+              skip = false;
+          }
+      } else {
+          if (!line) { continue; };
+          line = line.split('](');
+          var tail = line[1].split(') ');
+          events.push({
+            'signup_link': tail[0],
+            'event_name': line[0].replace('* [', ''),
+            'event_date': tail[1].slice(0, -1).replace('on ', '')
+          });
+      }
+  }
+           
+  let response = new Response(JSON.stringify({
+    'events': events
+  }));
+  response.headers.set('Content-Type', 'application/json');
+  return response;
+}
--- a/website/workers/repo.js
+++ b/website/workers/repo.js
@ -0,0 +1,10 @@
+addEventListener('fetch', event => {
+  event.respondWith(handleRequest(event.request))
+})
+
+async function handleRequest(request) {
+  let url = new URL(request.url);
+  url.hostname = 'repo.yandex.ru';
+  url.pathname = '/clickhouse' + url.pathname;
+  return fetch(url)
+}