feat: Add option to disable grace hash join in auto join_algorithm

This commit is contained in:
Sergey Skvortsov 2022-06-18 21:41:23 +03:00
parent 6696ccccb6
commit d835dc95e6
No known key found for this signature in database
GPG Key ID: 120217CE540C3670
3 changed files with 6 additions and 2 deletions

View File

@ -347,7 +347,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash', 'grace_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash', 'grace_hash'. 'auto' tries to change HashJoin to GraceHashJoin or MergeJoin on the fly to avoid out of memory.", 0) \
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
@ -600,6 +600,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \
M(Bool, allow_grace_hash_join, true, "Allow selecting grace hash join in 'auto' join algorithm", 0) \
M(UInt64, grace_hash_join_initial_buckets, 32, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "Buffer size for grace hash join temporary files. It makes sense to use smaller values, especially for SSDs, to save memory", 0) \

View File

@ -107,6 +107,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_)
, partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes)
, max_files_to_merge(settings.join_on_disk_max_files_to_merge)
, temporary_files_codec(settings.temporary_files_codec)
, grace_hash_join_allowed(settings.allow_grace_hash_join)
, tmp_volume(tmp_volume_)
{
}
@ -415,10 +416,11 @@ bool TableJoin::allowMergeJoin() const
bool TableJoin::allowGraceHashJoin() const
{
bool enabled_in_config = grace_hash_join_allowed;
bool is_asof = (strictness() == ASTTableJoin::Strictness::Asof);
bool is_right_or_full = isRight(kind()) || isFull(kind());
return !is_right_or_full && !is_asof && !isCrossOrComma(kind()) && oneDisjunct();
return enabled_in_config && !is_right_or_full && !is_asof && !isCrossOrComma(kind()) && oneDisjunct();
}
bool TableJoin::needStreamWithNonJoinedRows() const

View File

@ -112,6 +112,7 @@ private:
const size_t partial_merge_join_left_table_buffer_bytes = 0;
const size_t max_files_to_merge = 0;
const String temporary_files_codec = "LZ4";
const bool grace_hash_join_allowed = true;
/// the limit has no technical reasons, it supposed to improve safety
const size_t MAX_DISJUNCTS = 16; /// NOLINT