From 80259659ff2feb457500f71f20752e03dfe11c2e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 17 Sep 2024 18:03:19 +0200 Subject: [PATCH 1/5] More asserts --- src/Interpreters/HashJoin/HashJoin.cpp | 42 +++++++++++++++++++++++--- src/Interpreters/HashJoin/HashJoin.h | 1 + 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 1b8b45b94ea..23ce4fdf0f5 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -338,12 +338,8 @@ size_t HashJoin::getTotalRowCount() const return res; } -size_t HashJoin::getTotalByteCount() const +void HashJoin::doDebugAsserts() const { - if (!data) - return 0; - -#ifndef NDEBUG size_t debug_blocks_allocated_size = 0; for (const auto & block : data->blocks) debug_blocks_allocated_size += block.allocatedBytes(); @@ -359,6 +355,15 @@ size_t HashJoin::getTotalByteCount() const if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +} + +size_t HashJoin::getTotalByteCount() const +{ + if (!data) + return 0; + +#ifndef NDEBUG + doDebugAsserts(); #endif size_t res = 0; @@ -544,10 +549,17 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) have_compressed = true; } +#ifndef NDEBUG + doDebugAsserts(); +#endif data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); Block * stored_block = &data->blocks.back(); +#ifndef NDEBUG + doDebugAsserts(); +#endif + if (rows) data->empty = false; @@ -634,9 +646,15 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) if (!flag_per_row && !is_inserted) { +#ifndef NDEBUG + doDebugAsserts(); +#endif LOG_TRACE(log, "Skipping inserting block with {} rows", rows); data->blocks_allocated_size -= stored_block->allocatedBytes(); data->blocks.pop_back(); +#ifndef NDEBUG + doDebugAsserts(); +#endif } if (!check_limits) @@ -687,6 +705,10 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_ stored_block = stored_block.shrinkToFit(); size_t new_size = stored_block.allocatedBytes(); +#ifndef NDEBUG + doDebugAsserts(); +#endif + if (old_size >= new_size) { if (data->blocks_allocated_size < old_size - new_size) @@ -700,6 +722,10 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_ else /// Sometimes after clone resized block can be bigger than original data->blocks_allocated_size += new_size - old_size; + +#ifndef NDEBUG + doDebugAsserts(); +#endif } auto new_total_bytes_in_join = getTotalByteCount(); @@ -1415,7 +1441,13 @@ void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]]) }; BlocksList sorted_blocks; visit_rows_map(sorted_blocks, map); +#ifdef NDEBUG + doDebugAsserts(); +#endif data->blocks.swap(sorted_blocks); +#ifdef NDEBUG + doDebugAsserts(); +#endif } } diff --git a/src/Interpreters/HashJoin/HashJoin.h b/src/Interpreters/HashJoin/HashJoin.h index 230343691ea..4c1ebbcdc66 100644 --- a/src/Interpreters/HashJoin/HashJoin.h +++ b/src/Interpreters/HashJoin/HashJoin.h @@ -470,6 +470,7 @@ private: void tryRerangeRightTableData() override; template void tryRerangeRightTableDataImpl(Map & map); + void doDebugAsserts() const; }; } From 7c5d55c6b25b1138235010eccaeabe0b7e561c5d Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 17 Sep 2024 18:10:51 +0200 Subject: [PATCH 2/5] Lint --- src/Interpreters/HashJoin/HashJoin.cpp | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 23ce4fdf0f5..4f78243b09e 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -340,6 +340,7 @@ size_t HashJoin::getTotalRowCount() const void HashJoin::doDebugAsserts() const { +#ifdef NDEBUG size_t debug_blocks_allocated_size = 0; for (const auto & block : data->blocks) debug_blocks_allocated_size += block.allocatedBytes(); @@ -355,6 +356,7 @@ void HashJoin::doDebugAsserts() const if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +#endif } size_t HashJoin::getTotalByteCount() const @@ -362,9 +364,7 @@ size_t HashJoin::getTotalByteCount() const if (!data) return 0; -#ifndef NDEBUG doDebugAsserts(); -#endif size_t res = 0; @@ -549,16 +549,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) have_compressed = true; } -#ifndef NDEBUG doDebugAsserts(); -#endif data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); Block * stored_block = &data->blocks.back(); - -#ifndef NDEBUG doDebugAsserts(); -#endif if (rows) data->empty = false; @@ -646,15 +641,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) if (!flag_per_row && !is_inserted) { -#ifndef NDEBUG doDebugAsserts(); -#endif LOG_TRACE(log, "Skipping inserting block with {} rows", rows); data->blocks_allocated_size -= stored_block->allocatedBytes(); data->blocks.pop_back(); -#ifndef NDEBUG doDebugAsserts(); -#endif } if (!check_limits) @@ -705,9 +696,7 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_ stored_block = stored_block.shrinkToFit(); size_t new_size = stored_block.allocatedBytes(); -#ifndef NDEBUG doDebugAsserts(); -#endif if (old_size >= new_size) { @@ -723,9 +712,7 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_ /// Sometimes after clone resized block can be bigger than original data->blocks_allocated_size += new_size - old_size; -#ifndef NDEBUG doDebugAsserts(); -#endif } auto new_total_bytes_in_join = getTotalByteCount(); @@ -1441,13 +1428,9 @@ void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]]) }; BlocksList sorted_blocks; visit_rows_map(sorted_blocks, map); -#ifdef NDEBUG doDebugAsserts(); -#endif data->blocks.swap(sorted_blocks); -#ifdef NDEBUG doDebugAsserts(); -#endif } } From a210f9881968377c155c390b05ec00cfa62a6ccd Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 17 Sep 2024 18:28:27 +0200 Subject: [PATCH 3/5] Lint --- src/Interpreters/HashJoin/HashJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 4f78243b09e..9a1bfe2d408 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -340,7 +340,7 @@ size_t HashJoin::getTotalRowCount() const void HashJoin::doDebugAsserts() const { -#ifdef NDEBUG +#ifndef NDEBUG size_t debug_blocks_allocated_size = 0; for (const auto & block : data->blocks) debug_blocks_allocated_size += block.allocatedBytes(); From b08e727aef1f7509b5d6409c824f19660b14dab5 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 17 Sep 2024 19:02:10 +0200 Subject: [PATCH 4/5] Count allocated bytes from scratch after rerange --- src/Interpreters/HashJoin/HashJoin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 9a1bfe2d408..dfc41a93417 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -1430,6 +1430,10 @@ void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]]) visit_rows_map(sorted_blocks, map); doDebugAsserts(); data->blocks.swap(sorted_blocks); + size_t new_blocks_allocated_size = 0; + for (const auto & block : data->blocks) + new_blocks_allocated_size += block.allocatedBytes(); + data->blocks_allocated_size = new_blocks_allocated_size; doDebugAsserts(); } } From cb2484939613238a450a3370cc29c75d0c295edf Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 18 Sep 2024 15:24:48 +0200 Subject: [PATCH 5/5] Move assert --- src/Interpreters/HashJoin/HashJoin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index dfc41a93417..d1fa1280ed8 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -692,12 +692,12 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_ for (auto & stored_block : data->blocks) { + doDebugAsserts(); + size_t old_size = stored_block.allocatedBytes(); stored_block = stored_block.shrinkToFit(); size_t new_size = stored_block.allocatedBytes(); - doDebugAsserts(); - if (old_size >= new_size) { if (data->blocks_allocated_size < old_size - new_size)