Fixed extra squashing leaded to too big timeouts. [#CLICKHOUSE-3346]

2024-11-22 15:42:02 +00:00 · 2018-03-11 21:36:09 +03:00 · 2018-03-11 21:36:09 +03:00 · 32b617e1d6
commit 32b617e1d6
parent 96d4e59dab
9 changed files with 208 additions and 26 deletions
--- a/dbms/src/DataStreams/copyData.cpp
+++ b/dbms/src/DataStreams/copyData.cpp
@ -16,8 +16,8 @@ bool isAtomicSet(std::atomic<bool> * val)

 }

-template <typename Pred>
-void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_cancelled)
+template <typename TCancelCallback, typename TProgressCallback>
+void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress)
 {
    from.readPrefix();
    to.writePrefix();
@ -28,6 +28,7 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
            break;

        to.write(block);
+        progress(block);
    }

    if (is_cancelled())
@ -51,6 +52,8 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
 }


+inline void doNothing(const Block &) {}
+
 void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
 {
    auto is_cancelled_pred = [is_cancelled] ()
@ -58,13 +61,19 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
        return isAtomicSet(is_cancelled);
    };

-    copyDataImpl(from, to, is_cancelled_pred);
+    copyDataImpl(from, to, is_cancelled_pred, doNothing);
 }


 void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
 {
-    copyDataImpl(from, to, is_cancelled);
+    copyDataImpl(from, to, is_cancelled, doNothing);
+}
+
+void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
+              const std::function<void(const Block & block)> & progress)
+{
+    copyDataImpl(from, to, is_cancelled, progress);
 }

 }
--- a/dbms/src/DataStreams/copyData.h
+++ b/dbms/src/DataStreams/copyData.h
@ -9,6 +9,7 @@ namespace DB

 class IBlockInputStream;
 class IBlockOutputStream;
+class Block;

 /** Copies data from the InputStream into the OutputStream
  * (for example, from the database to the console, etc.)
@ -17,4 +18,7 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo

 void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);

+void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
+              const std::function<void(const Block & block)> & progress);
+
 }
--- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
@ -106,8 +106,12 @@ BlockIO InterpreterInsertQuery::execute()
        out, getSampleBlock(query, table), required_columns, table->column_defaults, context,
        static_cast<bool>(context.getSettingsRef().strict_insert_defaults));

-    out = std::make_shared<SquashingBlockOutputStream>(
-        out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
+    /// Do not squash blocks if it is a sync INSERT into Distributed
+    if (!(context.getSettingsRef().insert_distributed_sync && table->getName() == "Distributed"))
+    {
+        out = std::make_shared<SquashingBlockOutputStream>(
+            out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
+    }

    auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
    out_wrapper->setProcessListElement(context.getProcessListElement());
--- a/dbms/src/Server/ClusterCopier.cpp
+++ b/dbms/src/Server/ClusterCopier.cpp
@ -235,6 +235,7 @@ struct ClusterPartition
    double elapsed_time_seconds = 0;
    UInt64 bytes_copied = 0;
    UInt64 rows_copied = 0;
+    UInt64 blocks_copied = 0;

    size_t total_tries = 0;
 };
@ -1334,9 +1335,9 @@ protected:
                double elapsed = cluster_partition.elapsed_time_seconds;

                LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
-                                         << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied)
-                                         << " uncompressed bytes and "
-                                         << formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied");
+                         << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
+                         << ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
+                         << " and " << cluster_partition.blocks_copied << " source blocks are copied");

                if (cluster_partition.rows_copied)
                {
@ -1347,8 +1348,7 @@ protected:
                if (task_table.rows_copied)
                {
                    LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
-                                                   << formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)
-                                                   << " per second.");
+                        << formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
                }
            }
        }
@ -1430,6 +1430,7 @@ protected:
        {
            String query;
            query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
+            /// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field)
            query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
            if (!task_table.where_condition_str.empty())
                query += " AND (" + task_table.where_condition_str + ")";
@ -1655,19 +1656,15 @@ protected:

                /// Update statistics
                /// It is quite rough: bytes_copied don't take into account DROP PARTITION.
-                if (auto in = dynamic_cast<IProfilingBlockInputStream *>(io_select.in.get()))
+                auto update_stats = [&cluster_partition] (const Block & block)
                {
-                    auto update_table_stats = [&] (const Progress & progress)
-                    {
-                        cluster_partition.bytes_copied += progress.bytes;
-                        cluster_partition.rows_copied += progress.rows;
-                    };
-
-                    in->setProgressCallback(update_table_stats);
-                }
+                    cluster_partition.bytes_copied += block.bytes();
+                    cluster_partition.rows_copied += block.rows();
+                    cluster_partition.blocks_copied += 1;
+                };

                /// Main work is here
-                copyData(*io_select.in, *io_insert.out, cancel_check);
+                copyData(*io_select.in, *io_insert.out, cancel_check, update_stats);

                // Just in case
                if (future_is_dirty_checker != nullptr)
@ -1844,8 +1841,7 @@ protected:

        Context local_context = context;
        local_context.setSettings(task_cluster->settings_pull);
-        InterpreterSelectQuery interp(query_ast, local_context);
-        return interp.execute().in->read().rows() != 0;
+        return InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows() != 0;
    }

    /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@ -104,7 +104,18 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription()
        {
            buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows"
                   << " on shard " << job.shard_index << " replica " << job.replica_index
-                   << ", " << addresses[job.shard_index][job.replica_index].readableString() << "\n";
+                   << ", " << addresses[job.shard_index][job.replica_index].readableString();
+
+            /// Performance statistics
+            if (job.bloks_started > 0)
+            {
+                buffer << " (average " << job.elapsed_time_ms / job.bloks_started << " ms per block";
+                if (job.bloks_started > 1)
+                    buffer << ", the slowest block " << job.max_elapsed_time_for_block_ms << " ms";
+                buffer << ")";
+            }
+
+            buffer << "\n";
        }

    return buffer.str();
@ -178,8 +189,15 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
    auto memory_tracker = current_memory_tracker;
    return [this, memory_tracker, &job]()
    {
+        SCOPE_EXIT({++finished_jobs_count;});
+
+        Stopwatch watch;
+        ++job.bloks_started;
+
        SCOPE_EXIT({
-            ++finished_jobs_count;
+            UInt64 elapsed_time_for_block_ms = watch.elapsedMilliseconds();
+            job.elapsed_time_ms += elapsed_time_for_block_ms;
+            job.max_elapsed_time_for_block_ms = std::max(job.max_elapsed_time_for_block_ms, elapsed_time_for_block_ms);
        });

        if (!current_memory_tracker)
@ -250,7 +268,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
                job.stream->write(block);
        }

-        ++job.blocks_written;
+        job.blocks_written += 1;
        job.rows_written += block.rows();
    };
 }
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@ -107,6 +107,10 @@ private:

        UInt64 blocks_written = 0;
        UInt64 rows_written = 0;
+
+        UInt64 bloks_started = 0;
+        UInt64 elapsed_time_ms = 0;
+        UInt64 max_elapsed_time_for_block_ms = 0;
    };

    std::vector<std::list<JobInfo>> per_shard_jobs;
--- a/dbms/tests/integration/test_cluster_copier/configs/config.d/clusters.xml
+++ b/dbms/tests/integration/test_cluster_copier/configs/config.d/clusters.xml
@ -43,5 +43,19 @@
        </shard>
    </cluster1>

+    <shard_0_0>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s0_0_0</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>s0_0_1</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </shard_0_0>
+
 </remote_servers>
 </yandex>
--- a/dbms/tests/integration/test_cluster_copier/task_test_block_size.xml
+++ b/dbms/tests/integration/test_cluster_copier/task_test_block_size.xml
@ -0,0 +1,102 @@
+<?xml version="1.0"?>
+<yandex>
+    <!-- How many simualteneous workers are posssible -->
+    <max_workers>1</max_workers>
+
+    <!-- Common setting for pull and push operations -->
+    <settings>
+        <connect_timeout>1</connect_timeout>
+    </settings>
+
+    <settings_pull>
+    </settings_pull>
+
+    <!-- Tasks -->
+    <tables>
+        <test_block_size>
+            <cluster_pull>shard_0_0</cluster_pull>
+            <database_pull>default</database_pull>
+            <table_pull>test_block_size</table_pull>
+
+            <cluster_push>cluster1</cluster_push>
+            <database_push>default</database_push>
+            <table_push>test_block_size</table_push>
+
+            <enabled_partitions>
+                <partition>'1970-01-01'</partition>
+            </enabled_partitions>
+
+            <!-- Engine of destination tables -->
+            <engine>ENGINE=
+                ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/test_block_size', '{replica}')
+                ORDER BY d PARTITION BY partition
+            </engine>
+
+            <!-- Which sarding key to use while copying -->
+            <sharding_key>jumpConsistentHash(intHash64(d), 2)</sharding_key>
+
+            <!-- Optional expression that filter copying data -->
+            <!-- <where_condition></where_condition> -->
+        </test_block_size>
+    </tables>
+
+    <!-- Configuration of clusters -->
+    <remote_servers>
+    <cluster0>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s0_0_0</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>s0_0_1</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s0_1_0</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster0>
+
+    <cluster1>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s1_0_0</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>s1_0_1</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s1_1_0</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster1>
+
+    <shard_0_0>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>s0_0_0</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>s0_0_1</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </shard_0_0>
+    </remote_servers>
+
+</yandex>
--- a/dbms/tests/integration/test_cluster_copier/test.py
+++ b/dbms/tests/integration/test_cluster_copier/test.py
@ -140,6 +140,34 @@ class Task2:
        ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")


+class Task_test_block_size:
+
+    def __init__(self, cluster):
+        self.cluster = cluster
+        self.zk_task_path="/clickhouse-copier/task_test_block_size"
+        self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_test_block_size.xml'), 'r').read()
+        self.rows = 1000000
+
+
+    def start(self):
+        instance = cluster.instances['s0_0_0']
+
+        ddl_check_query(instance, """
+            CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64)
+            ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}')
+            ORDER BY d""", 2)
+
+        instance.query("INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format(self.rows))
+
+
+    def check(self):
+        assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM cluster(cluster1, default, test_block_size)")) == TSV("{}\n".format(self.rows))
+
+        instance = cluster.instances['s0_0_0']
+        ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2)
+        ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1")
+
+
 def execute_task(task, cmd_options):
    task.start()

@ -198,6 +226,9 @@ def test_copy_month_to_week_partition(started_cluster):
 def test_copy_month_to_week_partition_with_recovering(started_cluster):
    execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])

+def test_block_size(started_cluster):
+    execute_task(Task_test_block_size(started_cluster), [])
+

 if __name__ == '__main__':
    with contextmanager(started_cluster)() as cluster: