mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Fixed extra squashing leaded to too big timeouts. [#CLICKHOUSE-3346]
This commit is contained in:
parent
96d4e59dab
commit
32b617e1d6
@ -16,8 +16,8 @@ bool isAtomicSet(std::atomic<bool> * val)
|
||||
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_cancelled)
|
||||
template <typename TCancelCallback, typename TProgressCallback>
|
||||
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress)
|
||||
{
|
||||
from.readPrefix();
|
||||
to.writePrefix();
|
||||
@ -28,6 +28,7 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
|
||||
break;
|
||||
|
||||
to.write(block);
|
||||
progress(block);
|
||||
}
|
||||
|
||||
if (is_cancelled())
|
||||
@ -51,6 +52,8 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
|
||||
}
|
||||
|
||||
|
||||
inline void doNothing(const Block &) {}
|
||||
|
||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
|
||||
{
|
||||
auto is_cancelled_pred = [is_cancelled] ()
|
||||
@ -58,13 +61,19 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
|
||||
return isAtomicSet(is_cancelled);
|
||||
};
|
||||
|
||||
copyDataImpl(from, to, is_cancelled_pred);
|
||||
copyDataImpl(from, to, is_cancelled_pred, doNothing);
|
||||
}
|
||||
|
||||
|
||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
|
||||
{
|
||||
copyDataImpl(from, to, is_cancelled);
|
||||
copyDataImpl(from, to, is_cancelled, doNothing);
|
||||
}
|
||||
|
||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
|
||||
const std::function<void(const Block & block)> & progress)
|
||||
{
|
||||
copyDataImpl(from, to, is_cancelled, progress);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
|
||||
class IBlockInputStream;
|
||||
class IBlockOutputStream;
|
||||
class Block;
|
||||
|
||||
/** Copies data from the InputStream into the OutputStream
|
||||
* (for example, from the database to the console, etc.)
|
||||
@ -17,4 +18,7 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
|
||||
|
||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
|
||||
|
||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
|
||||
const std::function<void(const Block & block)> & progress);
|
||||
|
||||
}
|
||||
|
@ -106,8 +106,12 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
out, getSampleBlock(query, table), required_columns, table->column_defaults, context,
|
||||
static_cast<bool>(context.getSettingsRef().strict_insert_defaults));
|
||||
|
||||
out = std::make_shared<SquashingBlockOutputStream>(
|
||||
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
|
||||
/// Do not squash blocks if it is a sync INSERT into Distributed
|
||||
if (!(context.getSettingsRef().insert_distributed_sync && table->getName() == "Distributed"))
|
||||
{
|
||||
out = std::make_shared<SquashingBlockOutputStream>(
|
||||
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
|
||||
}
|
||||
|
||||
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
|
||||
out_wrapper->setProcessListElement(context.getProcessListElement());
|
||||
|
@ -235,6 +235,7 @@ struct ClusterPartition
|
||||
double elapsed_time_seconds = 0;
|
||||
UInt64 bytes_copied = 0;
|
||||
UInt64 rows_copied = 0;
|
||||
UInt64 blocks_copied = 0;
|
||||
|
||||
size_t total_tries = 0;
|
||||
};
|
||||
@ -1334,9 +1335,9 @@ protected:
|
||||
double elapsed = cluster_partition.elapsed_time_seconds;
|
||||
|
||||
LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
|
||||
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied)
|
||||
<< " uncompressed bytes and "
|
||||
<< formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied");
|
||||
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
|
||||
<< ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
|
||||
<< " and " << cluster_partition.blocks_copied << " source blocks are copied");
|
||||
|
||||
if (cluster_partition.rows_copied)
|
||||
{
|
||||
@ -1347,8 +1348,7 @@ protected:
|
||||
if (task_table.rows_copied)
|
||||
{
|
||||
LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
|
||||
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)
|
||||
<< " per second.");
|
||||
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1430,6 +1430,7 @@ protected:
|
||||
{
|
||||
String query;
|
||||
query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
|
||||
/// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field)
|
||||
query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
|
||||
if (!task_table.where_condition_str.empty())
|
||||
query += " AND (" + task_table.where_condition_str + ")";
|
||||
@ -1655,19 +1656,15 @@ protected:
|
||||
|
||||
/// Update statistics
|
||||
/// It is quite rough: bytes_copied don't take into account DROP PARTITION.
|
||||
if (auto in = dynamic_cast<IProfilingBlockInputStream *>(io_select.in.get()))
|
||||
auto update_stats = [&cluster_partition] (const Block & block)
|
||||
{
|
||||
auto update_table_stats = [&] (const Progress & progress)
|
||||
{
|
||||
cluster_partition.bytes_copied += progress.bytes;
|
||||
cluster_partition.rows_copied += progress.rows;
|
||||
};
|
||||
|
||||
in->setProgressCallback(update_table_stats);
|
||||
}
|
||||
cluster_partition.bytes_copied += block.bytes();
|
||||
cluster_partition.rows_copied += block.rows();
|
||||
cluster_partition.blocks_copied += 1;
|
||||
};
|
||||
|
||||
/// Main work is here
|
||||
copyData(*io_select.in, *io_insert.out, cancel_check);
|
||||
copyData(*io_select.in, *io_insert.out, cancel_check, update_stats);
|
||||
|
||||
// Just in case
|
||||
if (future_is_dirty_checker != nullptr)
|
||||
@ -1844,8 +1841,7 @@ protected:
|
||||
|
||||
Context local_context = context;
|
||||
local_context.setSettings(task_cluster->settings_pull);
|
||||
InterpreterSelectQuery interp(query_ast, local_context);
|
||||
return interp.execute().in->read().rows() != 0;
|
||||
return InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows() != 0;
|
||||
}
|
||||
|
||||
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
|
||||
|
@ -104,7 +104,18 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription()
|
||||
{
|
||||
buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows"
|
||||
<< " on shard " << job.shard_index << " replica " << job.replica_index
|
||||
<< ", " << addresses[job.shard_index][job.replica_index].readableString() << "\n";
|
||||
<< ", " << addresses[job.shard_index][job.replica_index].readableString();
|
||||
|
||||
/// Performance statistics
|
||||
if (job.bloks_started > 0)
|
||||
{
|
||||
buffer << " (average " << job.elapsed_time_ms / job.bloks_started << " ms per block";
|
||||
if (job.bloks_started > 1)
|
||||
buffer << ", the slowest block " << job.max_elapsed_time_for_block_ms << " ms";
|
||||
buffer << ")";
|
||||
}
|
||||
|
||||
buffer << "\n";
|
||||
}
|
||||
|
||||
return buffer.str();
|
||||
@ -178,8 +189,15 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
auto memory_tracker = current_memory_tracker;
|
||||
return [this, memory_tracker, &job]()
|
||||
{
|
||||
SCOPE_EXIT({++finished_jobs_count;});
|
||||
|
||||
Stopwatch watch;
|
||||
++job.bloks_started;
|
||||
|
||||
SCOPE_EXIT({
|
||||
++finished_jobs_count;
|
||||
UInt64 elapsed_time_for_block_ms = watch.elapsedMilliseconds();
|
||||
job.elapsed_time_ms += elapsed_time_for_block_ms;
|
||||
job.max_elapsed_time_for_block_ms = std::max(job.max_elapsed_time_for_block_ms, elapsed_time_for_block_ms);
|
||||
});
|
||||
|
||||
if (!current_memory_tracker)
|
||||
@ -250,7 +268,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
job.stream->write(block);
|
||||
}
|
||||
|
||||
++job.blocks_written;
|
||||
job.blocks_written += 1;
|
||||
job.rows_written += block.rows();
|
||||
};
|
||||
}
|
||||
|
@ -107,6 +107,10 @@ private:
|
||||
|
||||
UInt64 blocks_written = 0;
|
||||
UInt64 rows_written = 0;
|
||||
|
||||
UInt64 bloks_started = 0;
|
||||
UInt64 elapsed_time_ms = 0;
|
||||
UInt64 max_elapsed_time_for_block_ms = 0;
|
||||
};
|
||||
|
||||
std::vector<std::list<JobInfo>> per_shard_jobs;
|
||||
|
@ -43,5 +43,19 @@
|
||||
</shard>
|
||||
</cluster1>
|
||||
|
||||
<shard_0_0>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s0_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</shard_0_0>
|
||||
|
||||
</remote_servers>
|
||||
</yandex>
|
@ -0,0 +1,102 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<!-- How many simualteneous workers are posssible -->
|
||||
<max_workers>1</max_workers>
|
||||
|
||||
<!-- Common setting for pull and push operations -->
|
||||
<settings>
|
||||
<connect_timeout>1</connect_timeout>
|
||||
</settings>
|
||||
|
||||
<settings_pull>
|
||||
</settings_pull>
|
||||
|
||||
<!-- Tasks -->
|
||||
<tables>
|
||||
<test_block_size>
|
||||
<cluster_pull>shard_0_0</cluster_pull>
|
||||
<database_pull>default</database_pull>
|
||||
<table_pull>test_block_size</table_pull>
|
||||
|
||||
<cluster_push>cluster1</cluster_push>
|
||||
<database_push>default</database_push>
|
||||
<table_push>test_block_size</table_push>
|
||||
|
||||
<enabled_partitions>
|
||||
<partition>'1970-01-01'</partition>
|
||||
</enabled_partitions>
|
||||
|
||||
<!-- Engine of destination tables -->
|
||||
<engine>ENGINE=
|
||||
ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/test_block_size', '{replica}')
|
||||
ORDER BY d PARTITION BY partition
|
||||
</engine>
|
||||
|
||||
<!-- Which sarding key to use while copying -->
|
||||
<sharding_key>jumpConsistentHash(intHash64(d), 2)</sharding_key>
|
||||
|
||||
<!-- Optional expression that filter copying data -->
|
||||
<!-- <where_condition></where_condition> -->
|
||||
</test_block_size>
|
||||
</tables>
|
||||
|
||||
<!-- Configuration of clusters -->
|
||||
<remote_servers>
|
||||
<cluster0>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s0_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_1_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster0>
|
||||
|
||||
<cluster1>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s1_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s1_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s1_1_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster1>
|
||||
|
||||
<shard_0_0>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s0_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</shard_0_0>
|
||||
</remote_servers>
|
||||
|
||||
</yandex>
|
@ -140,6 +140,34 @@ class Task2:
|
||||
ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")
|
||||
|
||||
|
||||
class Task_test_block_size:
|
||||
|
||||
def __init__(self, cluster):
|
||||
self.cluster = cluster
|
||||
self.zk_task_path="/clickhouse-copier/task_test_block_size"
|
||||
self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_test_block_size.xml'), 'r').read()
|
||||
self.rows = 1000000
|
||||
|
||||
|
||||
def start(self):
|
||||
instance = cluster.instances['s0_0_0']
|
||||
|
||||
ddl_check_query(instance, """
|
||||
CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64)
|
||||
ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}')
|
||||
ORDER BY d""", 2)
|
||||
|
||||
instance.query("INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format(self.rows))
|
||||
|
||||
|
||||
def check(self):
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM cluster(cluster1, default, test_block_size)")) == TSV("{}\n".format(self.rows))
|
||||
|
||||
instance = cluster.instances['s0_0_0']
|
||||
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2)
|
||||
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1")
|
||||
|
||||
|
||||
def execute_task(task, cmd_options):
|
||||
task.start()
|
||||
|
||||
@ -198,6 +226,9 @@ def test_copy_month_to_week_partition(started_cluster):
|
||||
def test_copy_month_to_week_partition_with_recovering(started_cluster):
|
||||
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
|
||||
|
||||
def test_block_size(started_cluster):
|
||||
execute_task(Task_test_block_size(started_cluster), [])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with contextmanager(started_cluster)() as cluster:
|
||||
|
Loading…
Reference in New Issue
Block a user