From 99786b1e71a077a34d52c10d1cd2df25abfe147d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 11 Oct 2024 09:27:37 -0300 Subject: [PATCH] few comments adressed --- .../Parquet/ParquetBloomFilterCondition.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp index fa6d1748034..c38d91288ab 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp @@ -173,6 +173,11 @@ std::vector hash(const IColumn * data_column, const parquet::ColumnDes } } + if (hashes.size() != data_column->size()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Where predicate value hashing failed for some rows, but succeeded for others. It should not happen"); + } + return hashes; } @@ -311,6 +316,10 @@ bool ParquetBloomFilterCondition::mayBeTrueOnRowGroup(const ColumnIndexToBF & co { rpn_stack.emplace_back(true, false); } + else if (element.function == Function::ALWAYS_FALSE) + { + rpn_stack.emplace_back(false, true); + } else { rpn_stack.emplace_back(true, true); @@ -420,6 +429,7 @@ std::vector keyConditionRPNToParq const auto & set_index = rpn_element.set_index; const auto & ordered_set = set_index->getOrderedSet(); const auto & indexes_mapping = set_index->getIndexesMapping(); + bool found_empty_column = false; std::vector key_columns; @@ -441,7 +451,8 @@ std::vector keyConditionRPNToParq if (column->empty()) { - continue; + found_empty_column = true; + break; } if (const auto & nullable_column = checkAndGetColumn(set_column.get())) @@ -461,6 +472,12 @@ std::vector keyConditionRPNToParq key_columns.push_back(indexes_mapping[i].key_index); } + if (found_empty_column) + { + condition_elements.emplace_back(Function::ALWAYS_FALSE); + continue; + } + if (hashes.empty()) { condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);