Merge pull request #65243 from nickitat/fix_cross_join_perf

Fix perf regression introduced in #60459
This commit is contained in:
Alexey Milovidov 2024-06-14 06:00:33 +00:00 committed by GitHub
commit d93ce45a13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 3 deletions

View File

@ -869,6 +869,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
|| (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress)))
{
block_to_save = block_to_save.compress();
have_compressed = true;
}
data->blocks_allocated_size += block_to_save.allocatedBytes();
@ -2317,14 +2318,19 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
}
};
for (const Block & compressed_block_right : data->blocks)
for (const Block & block_right : data->blocks)
{
++block_number;
if (block_number < start_right_block)
continue;
auto block_right = compressed_block_right.decompress();
process_right_block(block_right);
/// The following statement cannot be substituted with `process_right_block(!have_compressed ? block_right : block_right.decompress())`
/// because it will lead to copying of `block_right` even if its branch is taken (because common type of `block_right` and `block_right.decompress()` is `Block`).
if (!have_compressed)
process_right_block(block_right);
else
process_right_block(block_right.decompress());
if (rows_added > max_joined_block_rows)
{
break;

View File

@ -434,7 +434,10 @@ private:
/// Changes in hash table broke correspondence,
/// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock)
mutable JoinStuff::JoinUsedFlags used_flags;
RightTableDataPtr data;
bool have_compressed = false;
std::vector<Sizes> key_sizes;
/// Needed to do external cross join