Merge pull request #42587 from ClickHouse/compress-marks-primary-key-by-default

Compress marks and primary key by default
2024-11-10 09:32:06 +00:00 · 2023-05-09 06:43:10 +03:00 · 2023-05-09 06:43:10 +03:00 · 9a73a04fe3
commit 9a73a04fe3
parent dda6e9175c e083acc361
7 changed files with 29 additions and 13 deletions
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -166,8 +166,8 @@ struct Settings;
    M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \
    M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
    /** Compress marks and primary key. */ \
-    M(Bool, compress_marks, false, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
-    M(Bool, compress_primary_key, false, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
+    M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
+    M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
    M(String, marks_compression_codec, "ZSTD(3)", "Compression encoding used by marks, marks are small enough and cached, so the default compression is ZSTD(3).", 0) \
    M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \
    M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@ -535,7 +535,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
        if (!args.storage_def->order_by)
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
                            "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. "
-                            "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()");
+                            "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY ()");

        /// Get sorting key from engine arguments.
        ///
--- a/tests/integration/test_backward_compatibility/configs/no_compress_marks.xml
+++ b/tests/integration/test_backward_compatibility/configs/no_compress_marks.xml
@ -0,0 +1,6 @@
+<clickhouse>
+    <merge_tree>
+        <compress_marks>0</compress_marks>
+        <compress_primary_key>0</compress_primary_key>
+    </merge_tree>
+</clickhouse>
--- a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml
+++ b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml
@ -1,7 +1,5 @@
 <clickhouse>
    <merge_tree>
        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
-        <compress_marks>0</compress_marks>
-        <compress_primary_key>0</compress_primary_key>
-    </merge_tree>    
+    </merge_tree>
 </clickhouse>
--- a/tests/integration/test_backward_compatibility/test.py
+++ b/tests/integration/test_backward_compatibility/test.py
@ -12,7 +12,9 @@ node1 = cluster.add_instance(
    with_installed_binary=True,
 )
 node2 = cluster.add_instance(
-    "node2", main_configs=["configs/wide_parts_only.xml"], with_zookeeper=True
+    "node2",
+    main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"],
+    with_zookeeper=True,
 )


--- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
+++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
@ -14,6 +14,7 @@ node_old = cluster.add_instance(
 )
 node_new = cluster.add_instance(
    "node2",
+    main_configs=["configs/no_compress_marks.xml"],
    with_zookeeper=True,
    stay_alive=True,
 )
@ -29,7 +30,7 @@ def start_cluster():
        cluster.shutdown()


-def test_vertical_merges_from_comapact_parts(start_cluster):
+def test_vertical_merges_from_compact_parts(start_cluster):
    for i, node in enumerate([node_old, node_new]):
        node.query(
            """
@ -41,7 +42,7 @@ def test_vertical_merges_from_comapact_parts(start_cluster):
                vertical_merge_algorithm_min_rows_to_activate = 1,
                vertical_merge_algorithm_min_columns_to_activate = 1,
                min_bytes_for_wide_part = 0,
-                min_rows_for_wide_part = 100;
+                min_rows_for_wide_part = 100
        """.format(
                i
            )
@ -104,8 +105,16 @@ def test_vertical_merges_from_comapact_parts(start_cluster):
    node_old.query("SYSTEM FLUSH LOGS")

    assert not (
-        node_old.contains_in_log("CHECKSUM_DOESNT_MATCH")
-        or node_new.contains_in_log("CHECKSUM_DOESNT_MATCH")
+        # Now the old node is restarted as a new, and its config allows compressed indices, and it merged the data into compressed indices,
+        # that's why the error about different number of compressed files is expected and ok.
+        (
+            node_old.contains_in_log("CHECKSUM_DOESNT_MATCH")
+            and not node_old.contains_in_log("Different number of files")
+        )
+        or (
+            node_new.contains_in_log("CHECKSUM_DOESNT_MATCH")
+            and not node_new.contains_in_log("Different number of files")
+        )
    )

    assert node_new.query(check_query.format("all_0_3_3")) == "Vertical\tWide\n"
--- a/tests/integration/test_merge_tree_load_parts/test.py
+++ b/tests/integration/test_merge_tree_load_parts/test.py
@ -227,7 +227,7 @@ def test_merge_tree_load_parts_filesystem_error(started_cluster):
    # It can be a filesystem exception triggered at initialization of part storage but it hard
    # to trigger it because it should be an exception on stat/listDirectory.
    # The most easy way to trigger such exception is to use chmod but clickhouse server
-    # is run with root user in integration test and this won't work. So let's do some
+    # is run with root user in integration test and this won't work. So let's do
    # some stupid things: create a table without adaptive granularity and change mark
    # extensions of data files in part to make clickhouse think that it's a compact part which
    # cannot be created in such table. This will trigger a LOGICAL_ERROR on part creation.
@ -240,7 +240,8 @@ def test_merge_tree_load_parts_filesystem_error(started_cluster):
        ).strip()

        node3.exec_in_container(
-            ["bash", "-c", f"mv {part_path}id.mrk {part_path}id.mrk3"], privileged=True
+            ["bash", "-c", f"mv {part_path}id.cmrk {part_path}id.cmrk3"],
+            privileged=True,
        )

    corrupt_part("mt_load_parts", "all_1_1_0")