Merge pull request #16728 from azat/enable-use_compact_format_in_distributed_parts_names-by-default

Enable use_compact_format_in_distributed_parts_names by default
This commit is contained in:
alexey-milovidov 2020-11-07 13:40:54 +03:00 committed by GitHub
commit 7acfa7d407
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 8 deletions

View File

@ -1765,6 +1765,23 @@ Default value: `0`.
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
- [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.
Possible values:
- 0 — Uses `user[:password]@host:port#default_database` directory format.
- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format.
Default value: `1`.
!!! note "Note"
- with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT.
- with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and cant be changed in a user session.

View File

@ -375,7 +375,7 @@ class IColumn;
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \

View File

@ -47,10 +47,12 @@ def test_single_file(started_cluster, cluster):
def test_two_files(started_cluster, cluster):
node.query(
"create table test.distr_2 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster))
node.query("insert into test.distr_2 values (0, '_'), (1, 'a')",
settings={"use_compact_format_in_distributed_parts_names": "1"})
node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')",
settings={"use_compact_format_in_distributed_parts_names": "1"})
node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", settings={
"use_compact_format_in_distributed_parts_names": "1",
})
node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", settings={
"use_compact_format_in_distributed_parts_names": "1",
})
query = "select * from file('/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin', 'Distributed') order by x"
out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query])
@ -70,7 +72,9 @@ def test_two_files(started_cluster, cluster):
def test_single_file_old(started_cluster, cluster):
node.query(
"create table test.distr_3 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster))
node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')")
node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={
"use_compact_format_in_distributed_parts_names": "0",
})
query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')"
out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query])

View File

@ -48,7 +48,9 @@ def test_insert(start_cluster):
# manual only (but only for remote node)
node.query('SYSTEM STOP DISTRIBUTED SENDS test.dist_foo')
node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)')
node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)', settings={
'use_compact_format_in_distributed_parts_names': '0',
})
assert _files_in_dist_mon(node, 'disk1', 'dist_foo') == 1
assert _files_in_dist_mon(node, 'disk2', 'dist_foo') == 0
@ -61,7 +63,9 @@ def test_insert(start_cluster):
#
node.query('RENAME TABLE test.dist_foo TO test.dist2_foo')
node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)')
node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)', settings={
'use_compact_format_in_distributed_parts_names': '0',
})
assert _files_in_dist_mon(node, 'disk1', 'dist2_foo') == 0
assert _files_in_dist_mon(node, 'disk2', 'dist2_foo') == 1