diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 1b52440903d..ad971ae7554 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2035,6 +2035,7 @@ Query: SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); ``` + Result: ``` text @@ -2043,6 +2044,43 @@ Result: └──────────────────────────────────────┘ ``` +## arrayZipUnaligned + +Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. + +**Syntax** + +``` sql +arrayZipUnaligned(arr1, arr2, ..., arrN) +``` + +**Arguments** + +- `arrN` — [Array](../data-types/array.md). + +The function can take any number of arrays of different types. + +**Returned value** + +- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). If the arrays have different sizes, the shorter arrays will be padded with `null` values. + +**Example** + +Query: + +``` sql +SELECT arrayZipUnaligned(['a'], [1, 2, 3]); +``` + +Result: + +``` text +┌─arrayZipUnaligned(['a'], [1, 2, 3])─┐ +│ [('a',1),(NULL,2),(NULL,3)] │ +└─────────────────────────────────────┘ +``` + + ## arrayAUC Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: ). diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 6c6fff5926b..6e1cc0f7788 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -1,7 +1,8 @@ -#include #include -#include +#include +#include #include +#include #include #include #include @@ -12,23 +13,22 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SIZES_OF_ARRAYS_DONT_MATCH; +extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +extern const int ILLEGAL_COLUMN; } /// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')] +/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)] +template class FunctionArrayZip : public IFunction { public: - static constexpr auto name = "arrayZip"; + static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -39,8 +39,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Function {} needs at least one argument; passed {}." , getName(), arguments.size()); + throw Exception( + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "Function {} needs at least one argument; passed {}.", + getName(), + arguments.size()); DataTypes arguments_types; for (size_t index = 0; index < arguments.size(); ++index) @@ -48,56 +51,142 @@ public: const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", - toString(index + 1), getName(), arguments[0].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} of function {} must be array. Found {} instead.", + toString(index + 1), + getName(), + arguments[0].type->getName()); - arguments_types.emplace_back(array_type->getNestedType()); + auto nested_type = array_type->getNestedType(); + if constexpr (allow_unaligned) + nested_type = makeNullable(nested_type); + arguments_types.emplace_back(nested_type); } return std::make_shared(std::make_shared(arguments_types)); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { size_t num_arguments = arguments.size(); - - ColumnPtr first_array_column; + Columns holders(num_arguments); Columns tuple_columns(num_arguments); + bool has_unaligned = false; + size_t unaligned_index = 0; for (size_t i = 0; i < num_arguments; ++i) { /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole. ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + holders[i] = holder; const ColumnArray * column_array = checkAndGetColumn(holder.get()); - if (!column_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.", - i + 1, getName(), holder->getName()); - - if (i == 0) - { - first_array_column = holder; - } - else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) - { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument {} of function {} have different array sizes", - i + 1, getName()); - } - + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument {} of function {} must be array. Found column {} instead.", + i + 1, + getName(), + holder->getName()); tuple_columns[i] = column_array->getDataPtr(); + + if (i && !column_array->hasEqualOffsets(static_cast(*holders[0]))) + { + has_unaligned = true; + unaligned_index = i; + } } - return ColumnArray::create( - ColumnTuple::create(tuple_columns), static_cast(*first_array_column).getOffsetsPtr()); + if constexpr (!allow_unaligned) + { + if (has_unaligned) + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "The argument 1 and argument {} of function {} have different array sizes", + unaligned_index + 1, + getName()); + else + return ColumnArray::create( + ColumnTuple::create(std::move(tuple_columns)), static_cast(*holders[0]).getOffsetsPtr()); + } + else + return executeUnaligned(holders, tuple_columns, input_rows_count, has_unaligned); + } + +private: + ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count, bool has_unaligned) const + { + std::vector array_columns(holders.size()); + for (size_t i = 0; i < holders.size(); ++i) + array_columns[i] = checkAndGetColumn(holders[i].get()); + + for (auto & tuple_column : tuple_columns) + tuple_column = makeNullable(tuple_column); + + if (!has_unaligned) + return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), array_columns[0]->getOffsetsPtr()); + + MutableColumns res_tuple_columns(tuple_columns.size()); + for (size_t i = 0; i < tuple_columns.size(); ++i) + { + res_tuple_columns[i] = tuple_columns[i]->cloneEmpty(); + res_tuple_columns[i]->reserve(tuple_columns[i]->size()); + } + + auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); + auto & res_offsets = assert_cast(*res_offsets_column).getData(); + size_t curr_offset = 0; + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + size_t max_size = 0; + for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i) + { + const auto * array_column = array_columns[arg_i]; + const auto & offsets = array_column->getOffsets(); + size_t array_offset = offsets[row_i - 1]; + size_t array_size = offsets[row_i] - array_offset; + + res_tuple_columns[arg_i]->insertRangeFrom(*tuple_columns[arg_i], array_offset, array_size); + max_size = std::max(max_size, array_size); + } + + for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i) + { + const auto * array_column = array_columns[arg_i]; + const auto & offsets = array_column->getOffsets(); + size_t array_offset = offsets[row_i - 1]; + size_t array_size = offsets[row_i] - array_offset; + + res_tuple_columns[arg_i]->insertManyDefaults(max_size - array_size); + } + + curr_offset += max_size; + res_offsets[row_i] = curr_offset; + } + + return ColumnArray::create(ColumnTuple::create(std::move(res_tuple_columns)), std::move(res_offsets_column)); } }; REGISTER_FUNCTION(ArrayZip) { - factory.registerFunction(); + factory.registerFunction>( + {.description = R"( +Combines multiple arrays into a single array. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. +)", + .categories{"String"}}); + + factory.registerFunction>( + {.description = R"( +Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. + +If the arrays have different sizes, the shorter arrays will be padded with `null` values. +)", + .categories{"String"}} + + ); } } - diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0980e25b70f..1368bf530c8 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -143,7 +143,6 @@ arrayStringConcat arraySum arrayUniq arrayWithConstant -arrayZip asinh assumeNotNull atan diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.reference b/tests/queries/0_stateless/03230_array_zip_unaligned.reference new file mode 100644 index 00000000000..7067f8788e5 --- /dev/null +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.reference @@ -0,0 +1,8 @@ +[('a','d'),('b','e'),('c','f')] Array(Tuple(Nullable(String), Nullable(String))) +[('a','d','g'),('b','e','h'),('c','f','i')] +[('a','d'),('b','e'),('c','f'),(NULL,'g')] +[('a',1),(NULL,2),(NULL,3)] +[('a',1,1.1),('b',2,2.2),('c',NULL,3.3),(NULL,NULL,4.4)] +[('g'),('h'),('i')] +[('g'),('h'),('i')] +[('g'),('h'),('i')] diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.sql b/tests/queries/0_stateless/03230_array_zip_unaligned.sql new file mode 100644 index 00000000000..90b7aa47bfd --- /dev/null +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.sql @@ -0,0 +1,15 @@ +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f']) as x, toTypeName(x); + +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']); + +SELECT arrayZipUnaligned(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } + +SELECT arrayZipUnaligned('a', 'b', 'c'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f', 'g']); + +SELECT arrayZipUnaligned(['a'], [1, 2, 3]); + +SELECT arrayZipUnaligned(['a', 'b', 'c'], [1, 2], [1.1, 2.2, 3.3, 4.4]); + +SELECT arrayZipUnaligned(materialize(['g', 'h', 'i'])) from numbers(3); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f259f35c653..d10db5f0d3d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1209,6 +1209,7 @@ arraySum arrayUniq arrayWithConstant arrayZip +arrayZipUnaligned ascii asin asinh