From 597181c45e2395991cbb032c7eb2dc3542124e6c Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 10 Sep 2024 16:32:52 +0800 Subject: [PATCH] review --- src/Core/Settings.h | 6 +++--- src/Core/SettingsChangesHistory.cpp | 6 +++--- src/Interpreters/HashJoin/HashJoin.cpp | 2 +- src/Interpreters/TableJoin.cpp | 4 ++-- src/Interpreters/TableJoin.h | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 88555c67e24..28041089d9f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -922,9 +922,9 @@ class IColumn; M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ - M(Int32, join_to_sort_perkey_rows_threshold, 40, "Rerange the right table by key in left or inner hash join when the per-key average rows of it exceed this value (means the table keys is dense) and its number of rows is not too many(controlled by `join_to_sort_table_rows_threshold`), to make the join output by the data batch of key, which would improve performance.", 0) \ - M(Int32, join_to_sort_table_rows_threshold, 10000, "Rerange the right table by key in left or inner hash join when its number of rows not exceed this value and the table keys is dense (controlled by `join_to_sort_perkey_rows_threshold`), to make the join performance improve as output by the data batch of key, but not cost too much on the table reranging.", 0) \ - M(Bool, allow_experimental_join_right_table_sorting, false, "If it is set to true, and the conditions of `join_to_sort_perkey_rows_threshold` and `join_to_sort_perkey_rows_threshold` are met, then we will try to rerange the right table by key to improve the performance in left or inner hash join.", 0) \ + M(Int32, join_to_sort_minimum_perkey_rows, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys", 0) \ + M(Int32, join_to_sort_maximum_table_rows, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join.", 0) \ + M(Bool, allow_experimental_join_right_table_sorting, false, "If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join.", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 4ac7a0f2d8d..da29b6b11cd 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -95,9 +95,9 @@ static std::initializer_listsorted || data->blocks.empty() || data->maps.size() > 1 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold()) + if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1 || data->rows_to_join > table_join->sortRightMaximumTableRows() || data->avgPerKeyRows() < table_join->sortRightMinimumPerkeyRows()) return; if (data->keys_to_join == 0) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 519264dd0e4..59a0374051f 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -116,8 +116,8 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, Temporary , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) , output_by_rowlist_perkey_rows_threshold(settings.join_output_by_rowlist_perkey_rows_threshold) - , sort_right_perkey_rows_threshold(settings.join_to_sort_perkey_rows_threshold) - , sort_right_table_rows_threshold(settings.join_to_sort_table_rows_threshold) + , sort_right_minimum_perkey_rows(settings.join_to_sort_minimum_perkey_rows) + , sort_right_maximum_table_rows(settings.join_to_sort_maximum_table_rows) , allow_join_sorting(settings.allow_experimental_join_right_table_sorting) , max_memory_usage(settings.max_memory_usage) , tmp_volume(tmp_volume_) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index c7926271a67..e1bae55a4ed 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -149,8 +149,8 @@ private: const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; const size_t output_by_rowlist_perkey_rows_threshold = 0; - const size_t sort_right_perkey_rows_threshold = 0; - const size_t sort_right_table_rows_threshold = 0; + const size_t sort_right_minimum_perkey_rows = 0; + const size_t sort_right_maximum_table_rows = 0; const bool allow_join_sorting = false; /// Value if setting max_memory_usage for query, can be used when max_bytes_in_join is not specified. @@ -300,8 +300,8 @@ public: } size_t outputByRowListPerkeyRowsThreshold() const { return output_by_rowlist_perkey_rows_threshold; } - size_t sortRightPerkeyRowsThreshold() const { return sort_right_perkey_rows_threshold; } - size_t sortRightTableRowsThreshold() const { return sort_right_table_rows_threshold; } + size_t sortRightMinimumPerkeyRows() const { return sort_right_minimum_perkey_rows; } + size_t sortRightMaximumTableRows() const { return sort_right_maximum_table_rows; } bool allowJoinSorting() const { return allow_join_sorting; } size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxJoinedBlockRows() const { return max_joined_block_rows; }