mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Fixed extra squashing leaded to too big timeouts. [#CLICKHOUSE-3346]
This commit is contained in:
parent
96d4e59dab
commit
32b617e1d6
@ -16,8 +16,8 @@ bool isAtomicSet(std::atomic<bool> * val)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Pred>
|
template <typename TCancelCallback, typename TProgressCallback>
|
||||||
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_cancelled)
|
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress)
|
||||||
{
|
{
|
||||||
from.readPrefix();
|
from.readPrefix();
|
||||||
to.writePrefix();
|
to.writePrefix();
|
||||||
@ -28,6 +28,7 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
to.write(block);
|
to.write(block);
|
||||||
|
progress(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_cancelled())
|
if (is_cancelled())
|
||||||
@ -51,6 +52,8 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void doNothing(const Block &) {}
|
||||||
|
|
||||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
|
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
|
||||||
{
|
{
|
||||||
auto is_cancelled_pred = [is_cancelled] ()
|
auto is_cancelled_pred = [is_cancelled] ()
|
||||||
@ -58,13 +61,19 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
|
|||||||
return isAtomicSet(is_cancelled);
|
return isAtomicSet(is_cancelled);
|
||||||
};
|
};
|
||||||
|
|
||||||
copyDataImpl(from, to, is_cancelled_pred);
|
copyDataImpl(from, to, is_cancelled_pred, doNothing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
|
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
|
||||||
{
|
{
|
||||||
copyDataImpl(from, to, is_cancelled);
|
copyDataImpl(from, to, is_cancelled, doNothing);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
|
||||||
|
const std::function<void(const Block & block)> & progress)
|
||||||
|
{
|
||||||
|
copyDataImpl(from, to, is_cancelled, progress);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ namespace DB
|
|||||||
|
|
||||||
class IBlockInputStream;
|
class IBlockInputStream;
|
||||||
class IBlockOutputStream;
|
class IBlockOutputStream;
|
||||||
|
class Block;
|
||||||
|
|
||||||
/** Copies data from the InputStream into the OutputStream
|
/** Copies data from the InputStream into the OutputStream
|
||||||
* (for example, from the database to the console, etc.)
|
* (for example, from the database to the console, etc.)
|
||||||
@ -17,4 +18,7 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
|
|||||||
|
|
||||||
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
|
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
|
||||||
|
|
||||||
|
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
|
||||||
|
const std::function<void(const Block & block)> & progress);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -106,8 +106,12 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
out, getSampleBlock(query, table), required_columns, table->column_defaults, context,
|
out, getSampleBlock(query, table), required_columns, table->column_defaults, context,
|
||||||
static_cast<bool>(context.getSettingsRef().strict_insert_defaults));
|
static_cast<bool>(context.getSettingsRef().strict_insert_defaults));
|
||||||
|
|
||||||
out = std::make_shared<SquashingBlockOutputStream>(
|
/// Do not squash blocks if it is a sync INSERT into Distributed
|
||||||
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
|
if (!(context.getSettingsRef().insert_distributed_sync && table->getName() == "Distributed"))
|
||||||
|
{
|
||||||
|
out = std::make_shared<SquashingBlockOutputStream>(
|
||||||
|
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
|
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
|
||||||
out_wrapper->setProcessListElement(context.getProcessListElement());
|
out_wrapper->setProcessListElement(context.getProcessListElement());
|
||||||
|
@ -235,6 +235,7 @@ struct ClusterPartition
|
|||||||
double elapsed_time_seconds = 0;
|
double elapsed_time_seconds = 0;
|
||||||
UInt64 bytes_copied = 0;
|
UInt64 bytes_copied = 0;
|
||||||
UInt64 rows_copied = 0;
|
UInt64 rows_copied = 0;
|
||||||
|
UInt64 blocks_copied = 0;
|
||||||
|
|
||||||
size_t total_tries = 0;
|
size_t total_tries = 0;
|
||||||
};
|
};
|
||||||
@ -1334,9 +1335,9 @@ protected:
|
|||||||
double elapsed = cluster_partition.elapsed_time_seconds;
|
double elapsed = cluster_partition.elapsed_time_seconds;
|
||||||
|
|
||||||
LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
|
LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
|
||||||
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied)
|
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
|
||||||
<< " uncompressed bytes and "
|
<< ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
|
||||||
<< formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied");
|
<< " and " << cluster_partition.blocks_copied << " source blocks are copied");
|
||||||
|
|
||||||
if (cluster_partition.rows_copied)
|
if (cluster_partition.rows_copied)
|
||||||
{
|
{
|
||||||
@ -1347,8 +1348,7 @@ protected:
|
|||||||
if (task_table.rows_copied)
|
if (task_table.rows_copied)
|
||||||
{
|
{
|
||||||
LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
|
LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
|
||||||
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)
|
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
|
||||||
<< " per second.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1430,6 +1430,7 @@ protected:
|
|||||||
{
|
{
|
||||||
String query;
|
String query;
|
||||||
query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
|
query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
|
||||||
|
/// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field)
|
||||||
query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
|
query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
|
||||||
if (!task_table.where_condition_str.empty())
|
if (!task_table.where_condition_str.empty())
|
||||||
query += " AND (" + task_table.where_condition_str + ")";
|
query += " AND (" + task_table.where_condition_str + ")";
|
||||||
@ -1655,19 +1656,15 @@ protected:
|
|||||||
|
|
||||||
/// Update statistics
|
/// Update statistics
|
||||||
/// It is quite rough: bytes_copied don't take into account DROP PARTITION.
|
/// It is quite rough: bytes_copied don't take into account DROP PARTITION.
|
||||||
if (auto in = dynamic_cast<IProfilingBlockInputStream *>(io_select.in.get()))
|
auto update_stats = [&cluster_partition] (const Block & block)
|
||||||
{
|
{
|
||||||
auto update_table_stats = [&] (const Progress & progress)
|
cluster_partition.bytes_copied += block.bytes();
|
||||||
{
|
cluster_partition.rows_copied += block.rows();
|
||||||
cluster_partition.bytes_copied += progress.bytes;
|
cluster_partition.blocks_copied += 1;
|
||||||
cluster_partition.rows_copied += progress.rows;
|
};
|
||||||
};
|
|
||||||
|
|
||||||
in->setProgressCallback(update_table_stats);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Main work is here
|
/// Main work is here
|
||||||
copyData(*io_select.in, *io_insert.out, cancel_check);
|
copyData(*io_select.in, *io_insert.out, cancel_check, update_stats);
|
||||||
|
|
||||||
// Just in case
|
// Just in case
|
||||||
if (future_is_dirty_checker != nullptr)
|
if (future_is_dirty_checker != nullptr)
|
||||||
@ -1844,8 +1841,7 @@ protected:
|
|||||||
|
|
||||||
Context local_context = context;
|
Context local_context = context;
|
||||||
local_context.setSettings(task_cluster->settings_pull);
|
local_context.setSettings(task_cluster->settings_pull);
|
||||||
InterpreterSelectQuery interp(query_ast, local_context);
|
return InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows() != 0;
|
||||||
return interp.execute().in->read().rows() != 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
|
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
|
||||||
|
@ -104,7 +104,18 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription()
|
|||||||
{
|
{
|
||||||
buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows"
|
buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows"
|
||||||
<< " on shard " << job.shard_index << " replica " << job.replica_index
|
<< " on shard " << job.shard_index << " replica " << job.replica_index
|
||||||
<< ", " << addresses[job.shard_index][job.replica_index].readableString() << "\n";
|
<< ", " << addresses[job.shard_index][job.replica_index].readableString();
|
||||||
|
|
||||||
|
/// Performance statistics
|
||||||
|
if (job.bloks_started > 0)
|
||||||
|
{
|
||||||
|
buffer << " (average " << job.elapsed_time_ms / job.bloks_started << " ms per block";
|
||||||
|
if (job.bloks_started > 1)
|
||||||
|
buffer << ", the slowest block " << job.max_elapsed_time_for_block_ms << " ms";
|
||||||
|
buffer << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
return buffer.str();
|
return buffer.str();
|
||||||
@ -178,8 +189,15 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
|||||||
auto memory_tracker = current_memory_tracker;
|
auto memory_tracker = current_memory_tracker;
|
||||||
return [this, memory_tracker, &job]()
|
return [this, memory_tracker, &job]()
|
||||||
{
|
{
|
||||||
|
SCOPE_EXIT({++finished_jobs_count;});
|
||||||
|
|
||||||
|
Stopwatch watch;
|
||||||
|
++job.bloks_started;
|
||||||
|
|
||||||
SCOPE_EXIT({
|
SCOPE_EXIT({
|
||||||
++finished_jobs_count;
|
UInt64 elapsed_time_for_block_ms = watch.elapsedMilliseconds();
|
||||||
|
job.elapsed_time_ms += elapsed_time_for_block_ms;
|
||||||
|
job.max_elapsed_time_for_block_ms = std::max(job.max_elapsed_time_for_block_ms, elapsed_time_for_block_ms);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!current_memory_tracker)
|
if (!current_memory_tracker)
|
||||||
@ -250,7 +268,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
|||||||
job.stream->write(block);
|
job.stream->write(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
++job.blocks_written;
|
job.blocks_written += 1;
|
||||||
job.rows_written += block.rows();
|
job.rows_written += block.rows();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -107,6 +107,10 @@ private:
|
|||||||
|
|
||||||
UInt64 blocks_written = 0;
|
UInt64 blocks_written = 0;
|
||||||
UInt64 rows_written = 0;
|
UInt64 rows_written = 0;
|
||||||
|
|
||||||
|
UInt64 bloks_started = 0;
|
||||||
|
UInt64 elapsed_time_ms = 0;
|
||||||
|
UInt64 max_elapsed_time_for_block_ms = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<std::list<JobInfo>> per_shard_jobs;
|
std::vector<std::list<JobInfo>> per_shard_jobs;
|
||||||
|
@ -43,5 +43,19 @@
|
|||||||
</shard>
|
</shard>
|
||||||
</cluster1>
|
</cluster1>
|
||||||
|
|
||||||
|
<shard_0_0>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</shard_0_0>
|
||||||
|
|
||||||
</remote_servers>
|
</remote_servers>
|
||||||
</yandex>
|
</yandex>
|
@ -0,0 +1,102 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<yandex>
|
||||||
|
<!-- How many simualteneous workers are posssible -->
|
||||||
|
<max_workers>1</max_workers>
|
||||||
|
|
||||||
|
<!-- Common setting for pull and push operations -->
|
||||||
|
<settings>
|
||||||
|
<connect_timeout>1</connect_timeout>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<settings_pull>
|
||||||
|
</settings_pull>
|
||||||
|
|
||||||
|
<!-- Tasks -->
|
||||||
|
<tables>
|
||||||
|
<test_block_size>
|
||||||
|
<cluster_pull>shard_0_0</cluster_pull>
|
||||||
|
<database_pull>default</database_pull>
|
||||||
|
<table_pull>test_block_size</table_pull>
|
||||||
|
|
||||||
|
<cluster_push>cluster1</cluster_push>
|
||||||
|
<database_push>default</database_push>
|
||||||
|
<table_push>test_block_size</table_push>
|
||||||
|
|
||||||
|
<enabled_partitions>
|
||||||
|
<partition>'1970-01-01'</partition>
|
||||||
|
</enabled_partitions>
|
||||||
|
|
||||||
|
<!-- Engine of destination tables -->
|
||||||
|
<engine>ENGINE=
|
||||||
|
ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/test_block_size', '{replica}')
|
||||||
|
ORDER BY d PARTITION BY partition
|
||||||
|
</engine>
|
||||||
|
|
||||||
|
<!-- Which sarding key to use while copying -->
|
||||||
|
<sharding_key>jumpConsistentHash(intHash64(d), 2)</sharding_key>
|
||||||
|
|
||||||
|
<!-- Optional expression that filter copying data -->
|
||||||
|
<!-- <where_condition></where_condition> -->
|
||||||
|
</test_block_size>
|
||||||
|
</tables>
|
||||||
|
|
||||||
|
<!-- Configuration of clusters -->
|
||||||
|
<remote_servers>
|
||||||
|
<cluster0>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s0_1_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</cluster0>
|
||||||
|
|
||||||
|
<cluster1>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s1_0_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>s1_0_1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s1_1_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</cluster1>
|
||||||
|
|
||||||
|
<shard_0_0>
|
||||||
|
<shard>
|
||||||
|
<internal_replication>true</internal_replication>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_0</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>s0_0_1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</shard_0_0>
|
||||||
|
</remote_servers>
|
||||||
|
|
||||||
|
</yandex>
|
@ -140,6 +140,34 @@ class Task2:
|
|||||||
ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")
|
ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")
|
||||||
|
|
||||||
|
|
||||||
|
class Task_test_block_size:
|
||||||
|
|
||||||
|
def __init__(self, cluster):
|
||||||
|
self.cluster = cluster
|
||||||
|
self.zk_task_path="/clickhouse-copier/task_test_block_size"
|
||||||
|
self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_test_block_size.xml'), 'r').read()
|
||||||
|
self.rows = 1000000
|
||||||
|
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
instance = cluster.instances['s0_0_0']
|
||||||
|
|
||||||
|
ddl_check_query(instance, """
|
||||||
|
CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64)
|
||||||
|
ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}')
|
||||||
|
ORDER BY d""", 2)
|
||||||
|
|
||||||
|
instance.query("INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format(self.rows))
|
||||||
|
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM cluster(cluster1, default, test_block_size)")) == TSV("{}\n".format(self.rows))
|
||||||
|
|
||||||
|
instance = cluster.instances['s0_0_0']
|
||||||
|
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2)
|
||||||
|
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1")
|
||||||
|
|
||||||
|
|
||||||
def execute_task(task, cmd_options):
|
def execute_task(task, cmd_options):
|
||||||
task.start()
|
task.start()
|
||||||
|
|
||||||
@ -198,6 +226,9 @@ def test_copy_month_to_week_partition(started_cluster):
|
|||||||
def test_copy_month_to_week_partition_with_recovering(started_cluster):
|
def test_copy_month_to_week_partition_with_recovering(started_cluster):
|
||||||
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
|
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
|
||||||
|
|
||||||
|
def test_block_size(started_cluster):
|
||||||
|
execute_task(Task_test_block_size(started_cluster), [])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
with contextmanager(started_cluster)() as cluster:
|
with contextmanager(started_cluster)() as cluster:
|
||||||
|
Loading…
Reference in New Issue
Block a user