From db4735a10508e02b50565bba8a1e71161df90f82 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 17 Jul 2023 12:12:25 +0000 Subject: [PATCH] Implemented requested changes --- src/Functions/array/arrayIntersect.cpp | 47 +++++++++++--------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 7a8bde7ab5f..83b26b56071 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -510,7 +510,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable map.clear(); bool all_has_nullable = all_nullable; - bool current_has_nullable = false; + bool current_has_nullable; for (size_t arg_num = 0; arg_num < args; ++arg_num) { @@ -549,7 +549,8 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable ++(*value); } } - + // We update offsets for all the arrays except the first one. Offsets for the first array would be updated later. + // It is needed to iterate the first array again so that the elements in the result would have fixed order. if (arg_num) { prev_off[arg_num] = off; @@ -570,15 +571,21 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else off = (*arg.offsets)[row]; + bool is_map_serialized = false; for (auto i : collections::range(prev_off[0], off)) { - all_has_nullable = all_nullable; - current_has_nullable = false; typename Map::LookupResult pair = nullptr; if (arg.null_map && (*arg.null_map)[i]) { current_has_nullable = true; + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } if (null_added) continue; } @@ -591,51 +598,37 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable else { const char * data = nullptr; - pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + if (!is_map_serialized) + { + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + is_map_serialized = true; + } } prev_off[0] = off; if (arg.is_const) prev_off[0] = 0; - if (!current_has_nullable) - all_has_nullable = false; - if (pair && pair->getMapped() == args) { + // We increase pair->getMapped() here to not skip duplicate values from the first array. ++pair->getMapped(); ++result_offset; if constexpr (is_numeric_column) { - if (pair->getKey() == columns[0]->getElement(i)) - { - result_data.insertValue(pair->getKey()); - } + result_data.insertValue(pair->getKey()); } else if constexpr (std::is_same_v || std::is_same_v) { - if (pair->getKey() == columns[0]->getDataAt(i)) - { - result_data.insertData(pair->getKey().data, pair->getKey().size); - } + result_data.insertData(pair->getKey().data, pair->getKey().size); } else { const char * data = nullptr; - if (pair->getKey() == columns[0]->serializeValueIntoArena(i, arena, data)) - { - result_data.deserializeAndInsertFromArena(pair->getKey().data); - } + result_data.deserializeAndInsertFromArena(pair->getKey().data); } if (all_nullable) null_map.push_back(0); } - if (all_has_nullable && !null_added) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - null_added = true; - } } result_offsets.getElement(row) = result_offset;