diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1b0e29fcce7..fecd3c6bdc5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -409,7 +409,7 @@ class IColumn; M(UInt64, max_rows_to_sort, 0, "If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \ M(UInt64, max_bytes_to_sort, 0, "If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \ M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ - M(UInt64, prefer_external_sort_block_bytes, 0, "Prefer maximum block bytes for external sort, reduce the memory usage during merging.", 0) \ + M(UInt64, prefer_external_sort_block_bytes, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging.", 0) \ M(UInt64, max_bytes_before_external_sort, 0, "If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of available system memory.", 0) \ M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \ M(Float, remerge_sort_lowered_memory_bytes_ratio, 2., "If memory usage after remerge does not reduced by this ratio, remerge will be disabled.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 737d620063f..5d63a07fe58 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,7 +85,6 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"24.5", {{"prefer_external_sort_block_bytes", 0, 0, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, @@ -94,6 +93,7 @@ static std::map sett {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, {"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"}, {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, + {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."} }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index b3ac91dee93..304b79e6c59 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -175,10 +175,11 @@ void MergeSortingTransform::consume(Chunk chunk) if (max_merged_block_size > 0) { auto avg_row_bytes = sum_bytes_in_blocks / sum_rows_in_blocks; - /// The memory usage of the block does not exceed max block bytes, and the number of rows in the block is not less than 128. - /// If the max_merged_block_size is less than 128, then the new block size is not less than max_merged_block_size - auto min_block_size = std::min(128UL, max_merged_block_size); - max_merged_block_size = std::max(std::min(max_merged_block_size, max_block_bytes / avg_row_bytes), min_block_size); + /// max_merged_block_size >= 128 + max_merged_block_size = std::max(std::min(max_merged_block_size, max_block_bytes / avg_row_bytes), 128UL); + /// when max_block_size < 128, use max_block_size. + /// max_block_size still works. + max_merged_block_size = std::min(this->max_merged_block_size, max_merged_block_size); } merge_sorter = std::make_unique(header_without_constants, std::move(chunks), description, max_merged_block_size, limit); auto current_processor = std::make_shared(header_without_constants, tmp_stream, log);