add docs and settings randomizations

2024-09-19 16:20:50 +00:00 · 2023-08-15 00:38:07 +00:00 · 2023-08-15 00:38:07 +00:00 · 2e22b17d57
commit 2e22b17d57
parent 5e90a6f88a
3 changed files with 13 additions and 4 deletions
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@ -555,7 +555,7 @@ Merge reads rows from parts in blocks of `merge_max_block_size` rows, then merge

 ## number_of_free_entries_in_pool_to_lower_max_size_of_merge {#number-of-free-entries-in-pool-to-lower-max-size-of-merge}

-When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). 
+When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue).
 This is to allow small merges to process - not filling the pool with long running merges.

 Possible values:
@ -566,7 +566,7 @@ Default value: 8

 ## number_of_free_entries_in_pool_to_execute_mutation {#number-of-free-entries-in-pool-to-execute-mutation}

-When there is less than specified number of free entries in pool, do not execute part mutations. 
+When there is less than specified number of free entries in pool, do not execute part mutations.
 This is to leave free threads for regular merges and avoid "Too many parts".

 Possible values:
@ -832,6 +832,13 @@ You can see which parts of `s` were stored using the sparse serialization:
 └────────┴────────────────────┘
 ```

+## replace_long_file_name_to_hash {#ratio_of_defaults_for_sparse_serialization}
+If the file name for column is too long (more than `max_file_name_length` bytes) replace it to SipHash128. Default value: `false`.
+
+## max_file_name_length {#max_file_name_length}
+
+The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. Default value: 128.
+
 ## clean_deleted_rows

 Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour.
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -34,8 +34,8 @@ struct Settings;
    M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
    M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \
    M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
-    M(Bool, replace_long_file_name_to_hash, true, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \
-    M(UInt64, max_file_name_length, 0, "The maximal length of the file name to keep it as is without hashing", 0) \
+    M(Bool, replace_long_file_name_to_hash, false, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \
+    M(UInt64, max_file_name_length, 128, "The maximal length of the file name to keep it as is without hashing", 0) \
    /** Merge settings. */ \
    M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
    M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -668,6 +668,8 @@ class MergeTreeSettingsRandomizer:
        "compress_primary_key": lambda: random.randint(0, 1),
        "marks_compress_block_size": lambda: random.randint(8000, 100000),
        "primary_key_compress_block_size": lambda: random.randint(8000, 100000),
+        "replace_long_file_name_to_hash": lambda: random.randint(0, 1),
+        "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128),
    }

    @staticmethod