allow unsligned arrays in arrayZip

This commit is contained in:
taiyang-li 2024-08-28 22:14:43 +08:00
parent c0b36c946d
commit 5b3ca6b2b9

View File

@ -1,7 +1,8 @@
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
@ -12,23 +13,22 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN;
}
/// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')]
/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)]
template <bool allow_unaligned>
class FunctionArrayZip : public IFunction
{
public:
static constexpr auto name = "arrayZip";
static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayZip>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
@ -39,8 +39,11 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} needs at least one argument; passed {}." , getName(), arguments.size());
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} needs at least one argument; passed {}.",
getName(),
arguments.size());
DataTypes arguments_types;
for (size_t index = 0; index < arguments.size(); ++index)
@ -48,16 +51,24 @@ public:
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[index].type.get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.",
toString(index + 1), getName(), arguments[0].type->getName());
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument {} of function {} must be array. Found {} instead.",
toString(index + 1),
getName(),
arguments[0].type->getName());
arguments_types.emplace_back(array_type->getNestedType());
auto nested_type = array_type->getNestedType();
if constexpr (allow_unaligned)
nested_type = makeNullable(nested_type);
arguments_types.emplace_back(nested_type);
}
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(arguments_types));
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
size_t num_arguments = arguments.size();
@ -68,12 +79,19 @@ public:
{
/// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole.
ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst();
const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(holder.get());
if (!column_array)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.",
i + 1, getName(), holder->getName());
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Argument {} of function {} must be array. Found column {} instead.",
i + 1,
getName(),
holder->getName());
tuple_columns[i] = column_array->getDataPtr();
if constexpr (allow_unaligned)
tuple_columns[i] = makeNullable(tuple_columns[i]);
if (i == 0)
{
@ -81,23 +99,61 @@ public:
}
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array sizes",
i + 1, getName());
if constexpr (allow_unaligned)
return executeUnaligned(static_cast<const ColumnArray &>(*first_array_column), *column_array, input_rows_count);
else
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array sizes",
i + 1,
getName());
}
tuple_columns[i] = column_array->getDataPtr();
}
return ColumnArray::create(
ColumnTuple::create(tuple_columns), static_cast<const ColumnArray &>(*first_array_column).getOffsetsPtr());
ColumnTuple::create(std::move(tuple_columns)), static_cast<const ColumnArray &>(*first_array_column).getOffsetsPtr());
}
private:
ColumnPtr
executeUnaligned(const ColumnArray & first_array_colmn, const ColumnArray & second_array_column, size_t input_rows_count) const
{
const auto & first_data = first_array_colmn.getDataPtr();
const auto & second_data = second_array_column.getDataPtr();
const auto & nullable_first_data = makeNullable(first_data);
const auto & nullable_second_data = makeNullable(second_data);
auto res_first_data = nullable_first_data->cloneEmpty();
auto res_second_data = nullable_second_data->cloneEmpty();
auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
auto & res_offsets = assert_cast<ColumnArray::ColumnOffsets &>(*res_offsets_column).getData();
const auto & first_offsets = first_array_colmn.getOffsets();
const auto & second_offsets = second_array_column.getOffsets();
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t first_size = first_offsets[i] - first_offsets[i - 1];
size_t second_size = second_offsets[i] - second_offsets[i - 1];
res_first_data->insertRangeFrom(*nullable_first_data, first_offsets[i - 1], first_size);
res_second_data->insertRangeFrom(*nullable_second_data, second_offsets[i - 1], second_size);
if (first_size < second_size)
res_first_data->insertManyDefaults(second_size - first_size);
else if (first_size > second_size)
res_second_data->insertManyDefaults(first_size - second_size);
res_offsets[i] = std::max(first_size, second_size);
}
Columns tuple_columns{std::move(res_first_data), std::move(res_second_data)};
return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), std::move(res_offsets_column));
}
};
REGISTER_FUNCTION(ArrayZip)
{
factory.registerFunction<FunctionArrayZip>();
factory.registerFunction<FunctionArrayZip<false>>();
factory.registerFunction<FunctionArrayZip<true>>();
}
}