From 1dcfaa91c2e23019dc9159956aec0d1023f254d4 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 21 Aug 2024 17:54:38 -0600 Subject: [PATCH 01/14] Templatize FunctionArrayIntersect with new mode types --- src/Functions/array/arrayIntersect.cpp | 44 +++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 209441eb301..8affe1ac11c 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -35,10 +35,21 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +struct ArrayModeIntersect +{ + static constexpr auto name = "arrayIntersect"; +}; + +struct ArrayModeUnion +{ + static constexpr auto name = "arrayUnion"; +}; + +template class FunctionArrayIntersect : public IFunction { public: - static constexpr auto name = "arrayIntersect"; + static constexpr auto name = Mode::name; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionArrayIntersect(ContextPtr context_) : context(context_) {} @@ -124,8 +135,8 @@ private: }; }; - -DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & arguments) const +template +DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & arguments) const { DataTypes nested_types; nested_types.reserve(arguments.size()); @@ -162,7 +173,8 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & argument return std::make_shared(result_type); } -ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, const DataTypePtr & data_type) const +template +ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, const DataTypePtr & data_type) const { if (const auto * column_nullable = checkAndGetColumn(column.get())) { @@ -208,7 +220,8 @@ ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, c return column; } -FunctionArrayIntersect::CastArgumentsResult FunctionArrayIntersect::castColumns( +template +FunctionArrayIntersect::CastArgumentsResult FunctionArrayIntersect::castColumns( const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, const DataTypePtr & return_type_with_nulls) { size_t num_args = arguments.size(); @@ -294,7 +307,8 @@ static ColumnPtr callFunctionNotEquals(ColumnWithTypeAndName first, ColumnWithTy return eq_func->execute(args, eq_func->getResultType(), args.front().column->size()); } -FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays( +template +FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays( const ColumnsWithTypeAndName & columns, ColumnsWithTypeAndName & initial_columns) const { UnpackedArrays arrays; @@ -384,7 +398,8 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays( return arrays; } -ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const +template +ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { const auto * return_type_array = checkAndGetDataType(result_type.get()); @@ -450,8 +465,9 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg return result_column; } +template template -void FunctionArrayIntersect::NumberExecutor::operator()(TypeList) +void FunctionArrayIntersect::NumberExecutor::operator()(TypeList) { using Container = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; @@ -460,8 +476,9 @@ void FunctionArrayIntersect::NumberExecutor::operator()(TypeList) result = execute, true>(arrays, ColumnVector::create()); } +template template -void FunctionArrayIntersect::DecimalExecutor::operator()(TypeList) +void FunctionArrayIntersect::DecimalExecutor::operator()(TypeList) { using Container = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; @@ -471,8 +488,9 @@ void FunctionArrayIntersect::DecimalExecutor::operator()(TypeList) result = execute, true>(arrays, ColumnDecimal::create(0, decimal->getScale())); } +template template -ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, MutableColumnPtr result_data_ptr) +ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, MutableColumnPtr result_data_ptr) { auto args = arrays.args.size(); auto rows = arrays.base_rows; @@ -641,9 +659,13 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable } +using ArrayIntersect = FunctionArrayIntersect; +using ArrayUnion = FunctionArrayIntersect; + REGISTER_FUNCTION(ArrayIntersect) { - factory.registerFunction(); + // factory.registerFunction(); + factory.registerFunction(); } } From d29f0d4c9668e134d1569f16089b77ea1a62e070 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 27 Aug 2024 21:29:34 -0600 Subject: [PATCH 02/14] Pull out code into insertElement() function and implement arrayUnion logic --- src/Functions/array/arrayIntersect.cpp | 169 ++++++++++++++++--------- 1 file changed, 111 insertions(+), 58 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 8affe1ac11c..53814e8b8da 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -108,6 +108,9 @@ private: template static ColumnPtr execute(const UnpackedArrays & arrays, MutableColumnPtr result_data); + template + static void insertElement(typename Map::LookupResult & pair, size_t & result_offset, ColumnType & result_data, NullMap & null_map, const bool & use_null_map); + struct NumberExecutor { const UnpackedArrays & arrays; @@ -158,6 +161,12 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar if (typeid_cast(nested_type.get())) has_nothing = true; + // { + // if (std::is_same_v) { + // has_nothing = true; + // break; + // } + // } else nested_types.push_back(nested_type); } @@ -169,6 +178,11 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar if (has_nothing) result_type = std::make_shared(); + // // If found any DataTypeNothing in IntersectMode or all DattaTypeNothing in UnionMode + // if (has_nothing || nested_types.empty()) + // result_type = std::make_shared(); + // else + // result_type = getMostSubtype(nested_types, true); return std::make_shared(result_type); } @@ -529,6 +543,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M map.clear(); bool all_has_nullable = all_nullable; + bool has_a_null = false; bool current_has_nullable = false; for (size_t arg_num = 0; arg_num < args; ++arg_num) @@ -564,7 +579,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M } /// Here we count the number of element appearances, but no more than once per array. - if (*value == arg_num) + if (*value <= arg_num) ++(*value); } } @@ -579,77 +594,93 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M } if (!current_has_nullable) all_has_nullable = false; + else + has_a_null = true; } // We have NULL in output only once if it should be there bool null_added = false; - const auto & arg = arrays.args[0]; - size_t off; - // const array has only one row - if (arg.is_const) - off = (*arg.offsets)[0]; - else - off = (*arg.offsets)[row]; + bool use_null_map; - for (auto i : collections::range(prev_off[0], off)) + if (std::is_same_v) { - all_has_nullable = all_nullable; - typename Map::LookupResult pair = nullptr; - - if (arg.null_map && (*arg.null_map)[i]) + use_null_map = has_a_null; + for (auto & p : map) { - current_has_nullable = true; - if (all_has_nullable && !null_added) + typename Map::LookupResult pair = map.find(p.getKey()); + if (pair && pair->getMapped() >= 1) { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - null_added = true; + insertElement(pair, result_offset, result_data, null_map, use_null_map); } - if (null_added) - continue; } - else if constexpr (is_numeric_column) + if (has_a_null && !null_added) { - pair = map.find(columns[0]->getElement(i)); - } - else if constexpr (std::is_same_v || std::is_same_v) - pair = map.find(columns[0]->getDataAt(i)); - else - { - const char * data = nullptr; - pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); - } - prev_off[0] = off; - if (arg.is_const) - prev_off[0] = 0; - - if (!current_has_nullable) - all_has_nullable = false; - - if (pair && pair->getMapped() == args) - { - // We increase pair->getMapped() here to not skip duplicate values from the first array. - ++pair->getMapped(); ++result_offset; - if constexpr (is_numeric_column) - { - result_data.insertValue(pair->getKey()); - } - else if constexpr (std::is_same_v || std::is_same_v) - { - result_data.insertData(pair->getKey().data, pair->getKey().size); - } - else - { - std::ignore = result_data.deserializeAndInsertFromArena(pair->getKey().data); - } - if (all_nullable) - null_map.push_back(0); + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; } } - result_offsets.getElement(row) = result_offset; + else if (std::is_same_v) + { + use_null_map = all_nullable; + const auto & arg = arrays.args[0]; + size_t off; + // const array has only one row + if (arg.is_const) + off = (*arg.offsets)[0]; + else + off = (*arg.offsets)[row]; + for (auto i : collections::range(prev_off[0], off)) + { + all_has_nullable = all_nullable; + typename Map::LookupResult pair = nullptr; + + if (arg.null_map && (*arg.null_map)[i]) + { + current_has_nullable = true; + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } + if (null_added) + continue; + } + else if constexpr (is_numeric_column) + { + pair = map.find(columns[0]->getElement(i)); + } + else if constexpr (std::is_same_v || std::is_same_v) + pair = map.find(columns[0]->getDataAt(i)); + else + { + const char * data = nullptr; + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + } + prev_off[0] = off; + if (arg.is_const) + prev_off[0] = 0; + + if (!current_has_nullable) + all_has_nullable = false; + + // Add the value if all arrays have the value for intersect + // or if there was at least one occurence in all of the arrays + if (pair && pair->getMapped() == args) + { + insertElement(pair, result_offset, result_data, null_map, use_null_map); + } + } + } + else + { + + } + result_offsets.getElement(row) = result_offset; } ColumnPtr result_column = std::move(result_data_ptr); if (all_nullable) @@ -658,14 +689,36 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M } +template +template +void FunctionArrayIntersect::insertElement(typename Map::LookupResult & pair, size_t & result_offset, ColumnType & result_data, NullMap & null_map, const bool & use_null_map) +{ + pair->getMapped() = -1; + ++result_offset; + if constexpr (is_numeric_column) + { + result_data.insertValue(pair->getKey()); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + result_data.insertData(pair->getKey().data, pair->getKey().size); + } + else + { + std::ignore = result_data.deserializeAndInsertFromArena(pair->getKey().data); + } + if (use_null_map) + null_map.push_back(0); +} + using ArrayIntersect = FunctionArrayIntersect; using ArrayUnion = FunctionArrayIntersect; REGISTER_FUNCTION(ArrayIntersect) { - // factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } From 3153c0cf96ff71af8e8579dcb034026d6655718f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 27 Aug 2024 21:37:08 -0600 Subject: [PATCH 03/14] Fix bug where empty array always returned if there's any empty array as input --- src/Functions/array/arrayIntersect.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 53814e8b8da..f0a0b3aee57 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -160,13 +160,13 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar const auto & nested_type = array_type->getNestedType(); if (typeid_cast(nested_type.get())) - has_nothing = true; - // { - // if (std::is_same_v) { - // has_nothing = true; - // break; - // } - // } + { + if (std::is_same_v) + { + has_nothing = true; + break; + } + } else nested_types.push_back(nested_type); } @@ -176,13 +176,11 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar if (!nested_types.empty()) result_type = getMostSubtype(nested_types, true); - if (has_nothing) + // If found any DataTypeNothing in IntersectMode or all DattaTypeNothing in UnionMode + if (has_nothing || nested_types.empty()) result_type = std::make_shared(); - // // If found any DataTypeNothing in IntersectMode or all DattaTypeNothing in UnionMode - // if (has_nothing || nested_types.empty()) - // result_type = std::make_shared(); - // else - // result_type = getMostSubtype(nested_types, true); + else + result_type = getMostSubtype(nested_types, true); return std::make_shared(result_type); } From 1fc6c091e6ef76e24d50ce306ea964b81fe7922f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 27 Aug 2024 23:55:48 -0600 Subject: [PATCH 04/14] Fix bug with improperly casted array types (e.g negative casted to UInt) --- src/Functions/array/arrayIntersect.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index f0a0b3aee57..41706ed0039 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -173,14 +175,16 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar DataTypePtr result_type; - if (!nested_types.empty()) - result_type = getMostSubtype(nested_types, true); - - // If found any DataTypeNothing in IntersectMode or all DattaTypeNothing in UnionMode + // If any DataTypeNothing in ArrayModeIntersect or all arrays in ArrayModeUnion are DataTypeNothing if (has_nothing || nested_types.empty()) result_type = std::make_shared(); else - result_type = getMostSubtype(nested_types, true); + { + if (std::is_same_v) + result_type = getMostSubtype(nested_types, true); + else + result_type = getLeastSupertype(nested_types); + } return std::make_shared(result_type); } @@ -429,7 +433,7 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName for (size_t i = 0; i < num_args; ++i) data_types.push_back(arguments[i].type); - auto return_type_with_nulls = getMostSubtype(data_types, true, true); + auto return_type_with_nulls = getReturnTypeImpl(data_types); auto casted_columns = castColumns(arguments, result_type, return_type_with_nulls); UnpackedArrays arrays = prepareArrays(casted_columns.casted, casted_columns.initial); From 854186dd72ba4d3d770351361145b409e119d58f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 28 Aug 2024 00:05:08 -0600 Subject: [PATCH 05/14] Add 03224_arrayUnion test --- .../0_stateless/03224_arrayUnion.reference | 73 +++++++++++++++++++ .../queries/0_stateless/03224_arrayUnion.sql | 53 ++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tests/queries/0_stateless/03224_arrayUnion.reference create mode 100644 tests/queries/0_stateless/03224_arrayUnion.sql diff --git a/tests/queries/0_stateless/03224_arrayUnion.reference b/tests/queries/0_stateless/03224_arrayUnion.reference new file mode 100644 index 00000000000..f6ab78b71fd --- /dev/null +++ b/tests/queries/0_stateless/03224_arrayUnion.reference @@ -0,0 +1,73 @@ +[1,2] +[1,2] +[1,2] +[1,2,3] +------- +[] +[1] +[1,2] +[1,2,3] +------- +[] +[1] +[1,2] +[1,2,3] +------- +[1,2] +[1,2] +[1,2] +[1,2,3] +------- +[1,2,3,4] +[1,2,3,4] +[1,2,3,4] +[1,2,3,4] +------- +[] +[] +[] +[] +------- +[1,2] +[1,2] +[1,2] +[1,2,3] +------- +[] +[1] +[1,2] +[1,2,3] +------- +[] +[1] +[1,2] +[1,2,3] +------- +[1,2] +[1,2] +[1,2] +[1,2,3] +------- +[1,2,3,4] +[1,2,3,4] +[1,2,3,4] +[1,2,3,4] +------- +[] +[] +[] +[] +------- +[-100,156] +------- +[-257,-100,1] +------- +['hello','hi'] +------- +[1,2,3,NULL] +------- +[1,2,3,NULL] +------- +[1,2,3,4,5,10,20] +------- +[1,2,3] diff --git a/tests/queries/0_stateless/03224_arrayUnion.sql b/tests/queries/0_stateless/03224_arrayUnion.sql new file mode 100644 index 00000000000..da8032524ad --- /dev/null +++ b/tests/queries/0_stateless/03224_arrayUnion.sql @@ -0,0 +1,53 @@ +drop table if exists array_union; + +create table array_union (date Date, arr Array(UInt8)) engine=MergeTree partition by date order by date; + +insert into array_union values ('2019-01-01', [1,2,3]); +insert into array_union values ('2019-01-01', [1,2]); +insert into array_union values ('2019-01-01', [1]); +insert into array_union values ('2019-01-01', []); + + +select arraySort(arrayUnion(arr, [1,2])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion(arr, [])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([], arr)) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([1,2], arr)) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([1,2], [1,2,3,4])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([], [])) from array_union order by arr; +select '-------'; + +optimize table array_union; + +select arraySort(arrayUnion(arr, [1,2])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion(arr, [])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([], arr)) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([1,2], arr)) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([1,2], [1,2,3,4])) from array_union order by arr; +select '-------'; +select arraySort(arrayUnion([], [])) from array_union order by arr; + +drop table if exists array_union; + +select '-------'; +select arraySort(arrayUnion([-100], [156])); +select '-------'; +select arraySort(arrayUnion([1], [-257, -100])); +select '-------'; +select arraySort(arrayUnion(['hi'], ['hello', 'hi'], [])); +select '-------'; +SELECT arraySort(arrayUnion([1, 2, NULL], [1, 3, NULL], [2, 3, NULL])); +select '-------'; +SELECT arraySort(arrayUnion([NULL, NULL, NULL, 1], [1, NULL, NULL], [1, 2, 3, NULL])); +select '-------'; +SELECT arraySort(arrayUnion([1, 1, 1, 2, 3], [2, 2, 4], [5, 10, 20])); +select '-------'; +SELECT arraySort(arrayUnion([1, 2], [1, 3], [])), From e4ba3b47fc97c503ca3e5168f405ab8e34732baa Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 28 Aug 2024 00:16:02 -0600 Subject: [PATCH 06/14] Document arrayUnion() in array-functions.md --- .../sql-reference/functions/array-functions.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 1b52440903d..e1516ae19d9 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1717,6 +1717,24 @@ Result: [[1,1,2,3],[1,2,3,4]] ``` +## arrayUnion(arr) + +Takes multiple arrays, returns an array that contains all elements that are present in any of the source arrays. + +Example: +```sql +SELECT + arrayUnion([-2, 1], [10, 1], [-2], []) as num_example, + arrayUnion(['hi'], [], ['hello', 'hi']) as str_example, + arrayUnion([1, 3, NULL], [2, 3, NULL]) as null_example +``` + +```text +┌─num_example─┬─str_example────┬─null_example─┐ +│ [10,-2,1] │ ['hello','hi'] │ [3,2,1,NULL] │ +└─────────────┴────────────────┴──────────────┘ +``` + ## arrayIntersect(arr) Takes multiple arrays, returns an array with elements that are present in all source arrays. From 8ba7cd52e41316af9d00a3aec9b975f5f9620c65 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 28 Aug 2024 13:21:14 +0200 Subject: [PATCH 07/14] fix stylecheck --- src/Functions/array/arrayIntersect.cpp | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 41706ed0039..7cde54fc479 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -671,7 +671,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M all_has_nullable = false; // Add the value if all arrays have the value for intersect - // or if there was at least one occurence in all of the arrays + // or if there was at least one occurrence in all of the arrays if (pair && pair->getMapped() == args) { insertElement(pair, result_offset, result_data, null_map, use_null_map); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ce7c666912e..662a83c4653 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2942 +personal_ws-1.1 en 2943 AArch ACLs ALTERs @@ -1187,6 +1187,7 @@ arraySort arraySplit arrayStringConcat arraySum +arrayUnion arrayUniq arrayWithConstant arrayZip From f09951ac3dc43d310fe972b35d6b88e6cc2ab720 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 28 Aug 2024 08:02:57 -0600 Subject: [PATCH 08/14] Remove optimize query from 03224_arrayUnion.sql --- tests/queries/0_stateless/03224_arrayUnion.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/queries/0_stateless/03224_arrayUnion.sql b/tests/queries/0_stateless/03224_arrayUnion.sql index da8032524ad..782da75bccc 100644 --- a/tests/queries/0_stateless/03224_arrayUnion.sql +++ b/tests/queries/0_stateless/03224_arrayUnion.sql @@ -20,9 +20,6 @@ select arraySort(arrayUnion([1,2], [1,2,3,4])) from array_union order by arr; select '-------'; select arraySort(arrayUnion([], [])) from array_union order by arr; select '-------'; - -optimize table array_union; - select arraySort(arrayUnion(arr, [1,2])) from array_union order by arr; select '-------'; select arraySort(arrayUnion(arr, [])) from array_union order by arr; From 9b51c4f3cf6d00d97f6315af49b57b68444de1b9 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 28 Aug 2024 08:36:50 -0600 Subject: [PATCH 09/14] Add arrayUnion to 02415_all_new_functions_must_be_documented.reference --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0980e25b70f..fd0ab1c9045 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -141,6 +141,7 @@ arraySort arraySplit arrayStringConcat arraySum +arrayUnion arrayUniq arrayWithConstant arrayZip From 693abc941ee6326729d20730fafa89abcfef4c83 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Fri, 30 Aug 2024 00:09:28 -0600 Subject: [PATCH 10/14] Fix bugs with return type in arrayIntersect.cpp and add constexpr --- src/Functions/array/arrayIntersect.cpp | 27 +++++++++++++------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 7cde54fc479..d480e372c9e 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -163,7 +163,7 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar if (typeid_cast(nested_type.get())) { - if (std::is_same_v) + if constexpr (std::is_same_v) { has_nothing = true; break; @@ -178,13 +178,10 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar // If any DataTypeNothing in ArrayModeIntersect or all arrays in ArrayModeUnion are DataTypeNothing if (has_nothing || nested_types.empty()) result_type = std::make_shared(); + else if constexpr (std::is_same_v) + result_type = getMostSubtype(nested_types, true); else - { - if (std::is_same_v) - result_type = getMostSubtype(nested_types, true); - else - result_type = getLeastSupertype(nested_types); - } + result_type = getLeastSupertype(nested_types); return std::make_shared(result_type); } @@ -433,7 +430,12 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName for (size_t i = 0; i < num_args; ++i) data_types.push_back(arguments[i].type); - auto return_type_with_nulls = getReturnTypeImpl(data_types); + DataTypePtr return_type_with_nulls; + if constexpr (std::is_same_v) + return_type_with_nulls = getMostSubtype(data_types, true, true); + else + return_type_with_nulls = getReturnTypeImpl(data_types); + auto casted_columns = castColumns(arguments, result_type, return_type_with_nulls); UnpackedArrays arrays = prepareArrays(casted_columns.casted, casted_columns.initial); @@ -604,7 +606,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M bool null_added = false; bool use_null_map; - if (std::is_same_v) + if constexpr (std::is_same_v) { use_null_map = has_a_null; for (auto & p : map) @@ -623,7 +625,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M null_added = true; } } - else if (std::is_same_v) + else if constexpr (std::is_same_v) { use_null_map = all_nullable; const auto & arg = arrays.args[0]; @@ -671,17 +673,14 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M all_has_nullable = false; // Add the value if all arrays have the value for intersect - // or if there was at least one occurrence in all of the arrays + // or if there was at least one occurrence in all of the arrays for union if (pair && pair->getMapped() == args) { insertElement(pair, result_offset, result_data, null_map, use_null_map); } } } - else - { - } result_offsets.getElement(row) = result_offset; } ColumnPtr result_column = std::move(result_data_ptr); From 181335f88b7d74e0e084606a5018a69a52b302a8 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 31 Aug 2024 13:41:21 -0600 Subject: [PATCH 11/14] Add has_nullable to fix bug with not using null map in arrayIntersect.cpp --- src/Functions/array/arrayIntersect.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index d480e372c9e..23785ea9791 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -514,6 +514,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M auto rows = arrays.base_rows; bool all_nullable = true; + bool has_nullable = false; std::vector columns; columns.reserve(args); @@ -529,6 +530,8 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M if (!arg.null_map) all_nullable = false; + else + has_nullable = true; } auto & result_data = static_cast(*result_data_ptr); @@ -608,7 +611,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, M if constexpr (std::is_same_v) { - use_null_map = has_a_null; + use_null_map = has_nullable; for (auto & p : map) { typename Map::LookupResult pair = map.find(p.getKey()); From fd6cb4eb8c71c7fb030675388b2b5df5f2eef0b6 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 31 Aug 2024 23:59:26 -0600 Subject: [PATCH 12/14] Fix return type in executeImpl so it returns array types not nested array types --- src/Functions/array/arrayIntersect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 23785ea9791..06811bc1b57 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -434,7 +434,7 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName if constexpr (std::is_same_v) return_type_with_nulls = getMostSubtype(data_types, true, true); else - return_type_with_nulls = getReturnTypeImpl(data_types); + return_type_with_nulls = getLeastSupertype(data_types); auto casted_columns = castColumns(arguments, result_type, return_type_with_nulls); From 3311214915477837386b15af9e40aaabffb19e40 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Fri, 6 Sep 2024 08:25:17 -0600 Subject: [PATCH 13/14] Remove duplicate test cases in 03224_arrayUnion.sql --- .../0_stateless/03224_arrayUnion.reference | 30 ------------------- .../queries/0_stateless/03224_arrayUnion.sql | 12 -------- 2 files changed, 42 deletions(-) diff --git a/tests/queries/0_stateless/03224_arrayUnion.reference b/tests/queries/0_stateless/03224_arrayUnion.reference index f6ab78b71fd..b900b6cdb0a 100644 --- a/tests/queries/0_stateless/03224_arrayUnion.reference +++ b/tests/queries/0_stateless/03224_arrayUnion.reference @@ -28,36 +28,6 @@ [] [] ------- -[1,2] -[1,2] -[1,2] -[1,2,3] -------- -[] -[1] -[1,2] -[1,2,3] -------- -[] -[1] -[1,2] -[1,2,3] -------- -[1,2] -[1,2] -[1,2] -[1,2,3] -------- -[1,2,3,4] -[1,2,3,4] -[1,2,3,4] -[1,2,3,4] -------- -[] -[] -[] -[] -------- [-100,156] ------- [-257,-100,1] diff --git a/tests/queries/0_stateless/03224_arrayUnion.sql b/tests/queries/0_stateless/03224_arrayUnion.sql index 782da75bccc..dedbacad906 100644 --- a/tests/queries/0_stateless/03224_arrayUnion.sql +++ b/tests/queries/0_stateless/03224_arrayUnion.sql @@ -8,18 +8,6 @@ insert into array_union values ('2019-01-01', [1]); insert into array_union values ('2019-01-01', []); -select arraySort(arrayUnion(arr, [1,2])) from array_union order by arr; -select '-------'; -select arraySort(arrayUnion(arr, [])) from array_union order by arr; -select '-------'; -select arraySort(arrayUnion([], arr)) from array_union order by arr; -select '-------'; -select arraySort(arrayUnion([1,2], arr)) from array_union order by arr; -select '-------'; -select arraySort(arrayUnion([1,2], [1,2,3,4])) from array_union order by arr; -select '-------'; -select arraySort(arrayUnion([], [])) from array_union order by arr; -select '-------'; select arraySort(arrayUnion(arr, [1,2])) from array_union order by arr; select '-------'; select arraySort(arrayUnion(arr, [])) from array_union order by arr; From f9455301ba211c38e1432b3475265c1119a31534 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 8 Sep 2024 19:33:34 -0700 Subject: [PATCH 14/14] Throw exception in decimal edge case in arrayUnion() --- src/Functions/array/arrayIntersect.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 06811bc1b57..316cc869ca1 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -147,6 +147,8 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar nested_types.reserve(arguments.size()); bool has_nothing = false; + DataTypePtr has_decimal_type = nullptr; + DataTypePtr has_non_decimal_type = nullptr; if (arguments.empty()) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); @@ -170,7 +172,24 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & ar } } else + { nested_types.push_back(nested_type); + + /// Throw exception if have a decimal and another type (e.g int/date type) + /// This is the same behavior as the arrayIntersect and notEquals functions + /// This case is not covered by getLeastSupertype() and results in crashing the program if left out + if constexpr (std::is_same_v) + { + if (WhichDataType(nested_type).isDecimal()) + has_decimal_type = nested_type; + else + has_non_decimal_type = nested_type; + + if (has_non_decimal_type && has_decimal_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments for function {}: {} and {}.", + getName(), has_non_decimal_type->getName(), has_decimal_type); + } + } } DataTypePtr result_type;