Merge pull request #42587 from ClickHouse/compress-marks-primary-key-by-default

Compress marks and primary key by default
This commit is contained in:
Alexey Milovidov 2023-05-09 06:43:10 +03:00 committed by GitHub
commit 9a73a04fe3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 29 additions and 13 deletions

View File

@ -166,8 +166,8 @@ struct Settings;
M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \
M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
/** Compress marks and primary key. */ \
M(Bool, compress_marks, false, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
M(Bool, compress_primary_key, false, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
M(String, marks_compression_codec, "ZSTD(3)", "Compression encoding used by marks, marks are small enough and cached, so the default compression is ZSTD(3).", 0) \
M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \
M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \

View File

@ -535,7 +535,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (!args.storage_def->order_by)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"You must provide an ORDER BY or PRIMARY KEY expression in the table definition. "
"If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()");
"If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY ()");
/// Get sorting key from engine arguments.
///

View File

@ -0,0 +1,6 @@
<clickhouse>
<merge_tree>
<compress_marks>0</compress_marks>
<compress_primary_key>0</compress_primary_key>
</merge_tree>
</clickhouse>

View File

@ -1,7 +1,5 @@
<clickhouse>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<compress_marks>0</compress_marks>
<compress_primary_key>0</compress_primary_key>
</merge_tree>
</merge_tree>
</clickhouse>

View File

@ -12,7 +12,9 @@ node1 = cluster.add_instance(
with_installed_binary=True,
)
node2 = cluster.add_instance(
"node2", main_configs=["configs/wide_parts_only.xml"], with_zookeeper=True
"node2",
main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"],
with_zookeeper=True,
)

View File

@ -14,6 +14,7 @@ node_old = cluster.add_instance(
)
node_new = cluster.add_instance(
"node2",
main_configs=["configs/no_compress_marks.xml"],
with_zookeeper=True,
stay_alive=True,
)
@ -29,7 +30,7 @@ def start_cluster():
cluster.shutdown()
def test_vertical_merges_from_comapact_parts(start_cluster):
def test_vertical_merges_from_compact_parts(start_cluster):
for i, node in enumerate([node_old, node_new]):
node.query(
"""
@ -41,7 +42,7 @@ def test_vertical_merges_from_comapact_parts(start_cluster):
vertical_merge_algorithm_min_rows_to_activate = 1,
vertical_merge_algorithm_min_columns_to_activate = 1,
min_bytes_for_wide_part = 0,
min_rows_for_wide_part = 100;
min_rows_for_wide_part = 100
""".format(
i
)
@ -104,8 +105,16 @@ def test_vertical_merges_from_comapact_parts(start_cluster):
node_old.query("SYSTEM FLUSH LOGS")
assert not (
node_old.contains_in_log("CHECKSUM_DOESNT_MATCH")
or node_new.contains_in_log("CHECKSUM_DOESNT_MATCH")
# Now the old node is restarted as a new, and its config allows compressed indices, and it merged the data into compressed indices,
# that's why the error about different number of compressed files is expected and ok.
(
node_old.contains_in_log("CHECKSUM_DOESNT_MATCH")
and not node_old.contains_in_log("Different number of files")
)
or (
node_new.contains_in_log("CHECKSUM_DOESNT_MATCH")
and not node_new.contains_in_log("Different number of files")
)
)
assert node_new.query(check_query.format("all_0_3_3")) == "Vertical\tWide\n"

View File

@ -227,7 +227,7 @@ def test_merge_tree_load_parts_filesystem_error(started_cluster):
# It can be a filesystem exception triggered at initialization of part storage but it hard
# to trigger it because it should be an exception on stat/listDirectory.
# The most easy way to trigger such exception is to use chmod but clickhouse server
# is run with root user in integration test and this won't work. So let's do some
# is run with root user in integration test and this won't work. So let's do
# some stupid things: create a table without adaptive granularity and change mark
# extensions of data files in part to make clickhouse think that it's a compact part which
# cannot be created in such table. This will trigger a LOGICAL_ERROR on part creation.
@ -240,7 +240,8 @@ def test_merge_tree_load_parts_filesystem_error(started_cluster):
).strip()
node3.exec_in_container(
["bash", "-c", f"mv {part_path}id.mrk {part_path}id.mrk3"], privileged=True
["bash", "-c", f"mv {part_path}id.cmrk {part_path}id.cmrk3"],
privileged=True,
)
corrupt_part("mt_load_parts", "all_1_1_0")