Fixed extra squashing leaded to too big timeouts. [#CLICKHOUSE-3346]

This commit is contained in:
Vitaliy Lyudvichenko 2018-03-11 21:36:09 +03:00
parent 96d4e59dab
commit 32b617e1d6
9 changed files with 208 additions and 26 deletions

View File

@ -16,8 +16,8 @@ bool isAtomicSet(std::atomic<bool> * val)
}
template <typename Pred>
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_cancelled)
template <typename TCancelCallback, typename TProgressCallback>
void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress)
{
from.readPrefix();
to.writePrefix();
@ -28,6 +28,7 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
break;
to.write(block);
progress(block);
}
if (is_cancelled())
@ -51,6 +52,8 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, Pred && is_
}
inline void doNothing(const Block &) {}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
{
auto is_cancelled_pred = [is_cancelled] ()
@ -58,13 +61,19 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
return isAtomicSet(is_cancelled);
};
copyDataImpl(from, to, is_cancelled_pred);
copyDataImpl(from, to, is_cancelled_pred, doNothing);
}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
{
copyDataImpl(from, to, is_cancelled);
copyDataImpl(from, to, is_cancelled, doNothing);
}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
const std::function<void(const Block & block)> & progress)
{
copyDataImpl(from, to, is_cancelled, progress);
}
}

View File

@ -9,6 +9,7 @@ namespace DB
class IBlockInputStream;
class IBlockOutputStream;
class Block;
/** Copies data from the InputStream into the OutputStream
* (for example, from the database to the console, etc.)
@ -17,4 +18,7 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,
const std::function<void(const Block & block)> & progress);
}

View File

@ -106,8 +106,12 @@ BlockIO InterpreterInsertQuery::execute()
out, getSampleBlock(query, table), required_columns, table->column_defaults, context,
static_cast<bool>(context.getSettingsRef().strict_insert_defaults));
out = std::make_shared<SquashingBlockOutputStream>(
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
/// Do not squash blocks if it is a sync INSERT into Distributed
if (!(context.getSettingsRef().insert_distributed_sync && table->getName() == "Distributed"))
{
out = std::make_shared<SquashingBlockOutputStream>(
out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
}
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
out_wrapper->setProcessListElement(context.getProcessListElement());

View File

@ -235,6 +235,7 @@ struct ClusterPartition
double elapsed_time_seconds = 0;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
UInt64 blocks_copied = 0;
size_t total_tries = 0;
};
@ -1334,9 +1335,9 @@ protected:
double elapsed = cluster_partition.elapsed_time_seconds;
LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied)
<< " uncompressed bytes and "
<< formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied");
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
<< ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
<< " and " << cluster_partition.blocks_copied << " source blocks are copied");
if (cluster_partition.rows_copied)
{
@ -1347,8 +1348,7 @@ protected:
if (task_table.rows_copied)
{
LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)
<< " per second.");
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
}
}
}
@ -1430,6 +1430,7 @@ protected:
{
String query;
query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
/// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field)
query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
if (!task_table.where_condition_str.empty())
query += " AND (" + task_table.where_condition_str + ")";
@ -1655,19 +1656,15 @@ protected:
/// Update statistics
/// It is quite rough: bytes_copied don't take into account DROP PARTITION.
if (auto in = dynamic_cast<IProfilingBlockInputStream *>(io_select.in.get()))
auto update_stats = [&cluster_partition] (const Block & block)
{
auto update_table_stats = [&] (const Progress & progress)
{
cluster_partition.bytes_copied += progress.bytes;
cluster_partition.rows_copied += progress.rows;
};
in->setProgressCallback(update_table_stats);
}
cluster_partition.bytes_copied += block.bytes();
cluster_partition.rows_copied += block.rows();
cluster_partition.blocks_copied += 1;
};
/// Main work is here
copyData(*io_select.in, *io_insert.out, cancel_check);
copyData(*io_select.in, *io_insert.out, cancel_check, update_stats);
// Just in case
if (future_is_dirty_checker != nullptr)
@ -1844,8 +1841,7 @@ protected:
Context local_context = context;
local_context.setSettings(task_cluster->settings_pull);
InterpreterSelectQuery interp(query_ast, local_context);
return interp.execute().in->read().rows() != 0;
return InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows() != 0;
}
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster

View File

@ -104,7 +104,18 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription()
{
buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows"
<< " on shard " << job.shard_index << " replica " << job.replica_index
<< ", " << addresses[job.shard_index][job.replica_index].readableString() << "\n";
<< ", " << addresses[job.shard_index][job.replica_index].readableString();
/// Performance statistics
if (job.bloks_started > 0)
{
buffer << " (average " << job.elapsed_time_ms / job.bloks_started << " ms per block";
if (job.bloks_started > 1)
buffer << ", the slowest block " << job.max_elapsed_time_for_block_ms << " ms";
buffer << ")";
}
buffer << "\n";
}
return buffer.str();
@ -178,8 +189,15 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
auto memory_tracker = current_memory_tracker;
return [this, memory_tracker, &job]()
{
SCOPE_EXIT({++finished_jobs_count;});
Stopwatch watch;
++job.bloks_started;
SCOPE_EXIT({
++finished_jobs_count;
UInt64 elapsed_time_for_block_ms = watch.elapsedMilliseconds();
job.elapsed_time_ms += elapsed_time_for_block_ms;
job.max_elapsed_time_for_block_ms = std::max(job.max_elapsed_time_for_block_ms, elapsed_time_for_block_ms);
});
if (!current_memory_tracker)
@ -250,7 +268,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
job.stream->write(block);
}
++job.blocks_written;
job.blocks_written += 1;
job.rows_written += block.rows();
};
}

View File

@ -107,6 +107,10 @@ private:
UInt64 blocks_written = 0;
UInt64 rows_written = 0;
UInt64 bloks_started = 0;
UInt64 elapsed_time_ms = 0;
UInt64 max_elapsed_time_for_block_ms = 0;
};
std::vector<std::list<JobInfo>> per_shard_jobs;

View File

@ -43,5 +43,19 @@
</shard>
</cluster1>
<shard_0_0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
</shard_0_0>
</remote_servers>
</yandex>

View File

@ -0,0 +1,102 @@
<?xml version="1.0"?>
<yandex>
<!-- How many simualteneous workers are posssible -->
<max_workers>1</max_workers>
<!-- Common setting for pull and push operations -->
<settings>
<connect_timeout>1</connect_timeout>
</settings>
<settings_pull>
</settings_pull>
<!-- Tasks -->
<tables>
<test_block_size>
<cluster_pull>shard_0_0</cluster_pull>
<database_pull>default</database_pull>
<table_pull>test_block_size</table_pull>
<cluster_push>cluster1</cluster_push>
<database_push>default</database_push>
<table_push>test_block_size</table_push>
<enabled_partitions>
<partition>'1970-01-01'</partition>
</enabled_partitions>
<!-- Engine of destination tables -->
<engine>ENGINE=
ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/test_block_size', '{replica}')
ORDER BY d PARTITION BY partition
</engine>
<!-- Which sarding key to use while copying -->
<sharding_key>jumpConsistentHash(intHash64(d), 2)</sharding_key>
<!-- Optional expression that filter copying data -->
<!-- <where_condition></where_condition> -->
</test_block_size>
</tables>
<!-- Configuration of clusters -->
<remote_servers>
<cluster0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster0>
<cluster1>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s1_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster1>
<shard_0_0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
</shard_0_0>
</remote_servers>
</yandex>

View File

@ -140,6 +140,34 @@ class Task2:
ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")
class Task_test_block_size:
def __init__(self, cluster):
self.cluster = cluster
self.zk_task_path="/clickhouse-copier/task_test_block_size"
self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_test_block_size.xml'), 'r').read()
self.rows = 1000000
def start(self):
instance = cluster.instances['s0_0_0']
ddl_check_query(instance, """
CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64)
ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}')
ORDER BY d""", 2)
instance.query("INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format(self.rows))
def check(self):
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM cluster(cluster1, default, test_block_size)")) == TSV("{}\n".format(self.rows))
instance = cluster.instances['s0_0_0']
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2)
ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1")
def execute_task(task, cmd_options):
task.start()
@ -198,6 +226,9 @@ def test_copy_month_to_week_partition(started_cluster):
def test_copy_month_to_week_partition_with_recovering(started_cluster):
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
def test_block_size(started_cluster):
execute_task(Task_test_block_size(started_cluster), [])
if __name__ == '__main__':
with contextmanager(started_cluster)() as cluster: