mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #67707 from bigo-sg/opt_array_map
Optimize function `array()` and `map()`
This commit is contained in:
commit
d749869f00
@ -1,11 +1,15 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -44,11 +48,13 @@ public:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
size_t num_elements = arguments.size();
|
||||
const size_t num_elements = arguments.size();
|
||||
|
||||
if (num_elements == 0)
|
||||
{
|
||||
/// We should return constant empty array.
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
}
|
||||
|
||||
const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*result_type).getNestedType();
|
||||
|
||||
@ -60,7 +66,6 @@ public:
|
||||
|
||||
Columns columns_holder(num_elements);
|
||||
ColumnRawPtrs column_ptrs(num_elements);
|
||||
|
||||
for (size_t i = 0; i < num_elements; ++i)
|
||||
{
|
||||
const auto & arg = arguments[i];
|
||||
@ -77,35 +82,199 @@ public:
|
||||
}
|
||||
|
||||
/// Create and fill the result array.
|
||||
|
||||
auto out = ColumnArray::create(elem_type->createColumn());
|
||||
IColumn & out_data = out->getData();
|
||||
IColumn::Offsets & out_offsets = out->getOffsets();
|
||||
|
||||
out_data.reserve(input_rows_count * num_elements);
|
||||
out_offsets.resize(input_rows_count);
|
||||
|
||||
/// Fill out_offsets
|
||||
out_offsets.resize_exact(input_rows_count);
|
||||
IColumn::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < num_elements; ++j)
|
||||
out_data.insertFrom(*column_ptrs[j], i);
|
||||
|
||||
current_offset += num_elements;
|
||||
out_offsets[i] = current_offset;
|
||||
}
|
||||
|
||||
/// Fill out_data
|
||||
out_data.reserve(input_rows_count * num_elements);
|
||||
if (num_elements == 1)
|
||||
out_data.insertRangeFrom(*column_ptrs[0], 0, input_rows_count);
|
||||
else
|
||||
execute(column_ptrs, out_data, input_rows_count);
|
||||
return out;
|
||||
}
|
||||
|
||||
private:
|
||||
bool execute(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
return executeNumber<UInt8>(columns, out_data, input_rows_count) || executeNumber<UInt16>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<UInt32>(columns, out_data, input_rows_count) || executeNumber<UInt64>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<UInt128>(columns, out_data, input_rows_count) || executeNumber<UInt256>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Int8>(columns, out_data, input_rows_count) || executeNumber<Int16>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Int32>(columns, out_data, input_rows_count) || executeNumber<Int64>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Int128>(columns, out_data, input_rows_count) || executeNumber<Int256>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Float32>(columns, out_data, input_rows_count) || executeNumber<Float64>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Decimal32>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Decimal64>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Decimal128>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<Decimal256>(columns, out_data, input_rows_count)
|
||||
|| executeNumber<DateTime64>(columns, out_data, input_rows_count) || executeString(columns, out_data, input_rows_count)
|
||||
|| executeNullable(columns, out_data, input_rows_count) || executeTuple(columns, out_data, input_rows_count)
|
||||
|| executeFixedString(columns, out_data, input_rows_count) || executeGeneric(columns, out_data, input_rows_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool executeNumber(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
using Container = ColumnVectorOrDecimal<T>::Container;
|
||||
std::vector<const Container *> containers(columns.size(), nullptr);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
const ColumnVectorOrDecimal<T> * concrete_column = checkAndGetColumn<ColumnVectorOrDecimal<T>>(columns[i]);
|
||||
if (!concrete_column)
|
||||
return false;
|
||||
|
||||
containers[i] = &concrete_column->getData();
|
||||
}
|
||||
|
||||
ColumnVectorOrDecimal<T> & concrete_out_data = assert_cast<ColumnVectorOrDecimal<T> &>(out_data);
|
||||
Container & out_container = concrete_out_data.getData();
|
||||
out_container.resize_exact(columns.size() * input_rows_count);
|
||||
|
||||
for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
|
||||
{
|
||||
const size_t base = row_i * columns.size();
|
||||
for (size_t col_i = 0; col_i < columns.size(); ++col_i)
|
||||
out_container[base + col_i] = (*containers[col_i])[row_i];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool executeString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
size_t total_bytes = 0;
|
||||
std::vector<const ColumnString *> concrete_columns(columns.size(), nullptr);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
const ColumnString * concrete_column = checkAndGetColumn<ColumnString>(columns[i]);
|
||||
if (!concrete_column)
|
||||
return false;
|
||||
|
||||
total_bytes += concrete_column->getChars().size();
|
||||
concrete_columns[i] = concrete_column;
|
||||
}
|
||||
|
||||
ColumnString & concrete_out_data = assert_cast<ColumnString &>(out_data);
|
||||
auto & out_chars = concrete_out_data.getChars();
|
||||
auto & out_offsets = concrete_out_data.getOffsets();
|
||||
out_chars.resize_exact(total_bytes);
|
||||
out_offsets.resize_exact(input_rows_count * columns.size());
|
||||
|
||||
size_t cur_out_offset = 0;
|
||||
for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
|
||||
{
|
||||
const size_t base = row_i * columns.size();
|
||||
for (size_t col_i = 0; col_i < columns.size(); ++col_i)
|
||||
{
|
||||
StringRef ref = concrete_columns[col_i]->getDataAt(row_i);
|
||||
memcpySmallAllowReadWriteOverflow15(&out_chars[cur_out_offset], ref.data, ref.size);
|
||||
out_chars[cur_out_offset + ref.size] = 0;
|
||||
|
||||
cur_out_offset += ref.size + 1;
|
||||
out_offsets[base + col_i] = cur_out_offset;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool executeFixedString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
std::vector<const ColumnFixedString *> concrete_columns(columns.size(), nullptr);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
const ColumnFixedString * concrete_column = checkAndGetColumn<ColumnFixedString>(columns[i]);
|
||||
if (!concrete_column)
|
||||
return false;
|
||||
|
||||
concrete_columns[i] = concrete_column;
|
||||
}
|
||||
|
||||
ColumnFixedString & concrete_out_data = assert_cast<ColumnFixedString &>(out_data);
|
||||
auto & out_chars = concrete_out_data.getChars();
|
||||
|
||||
const size_t n = concrete_out_data.getN();
|
||||
size_t total_bytes = n * columns.size() * input_rows_count;
|
||||
out_chars.resize_exact(total_bytes);
|
||||
|
||||
size_t curr_out_offset = 0;
|
||||
for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
|
||||
{
|
||||
for (size_t col_i = 0; col_i < columns.size(); ++col_i)
|
||||
{
|
||||
StringRef ref = concrete_columns[col_i]->getDataAt(row_i);
|
||||
memcpySmallAllowReadWriteOverflow15(&out_chars[curr_out_offset], ref.data, n);
|
||||
curr_out_offset += n;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool executeNullable(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
ColumnRawPtrs null_maps(columns.size(), nullptr);
|
||||
ColumnRawPtrs nested_columns(columns.size(), nullptr);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
const ColumnNullable * concrete_column = checkAndGetColumn<ColumnNullable>(columns[i]);
|
||||
if (!concrete_column)
|
||||
return false;
|
||||
|
||||
null_maps[i] = &concrete_column->getNullMapColumn();
|
||||
nested_columns[i] = &concrete_column->getNestedColumn();
|
||||
}
|
||||
|
||||
ColumnNullable & concrete_out_data = assert_cast<ColumnNullable &>(out_data);
|
||||
auto & out_null_map = concrete_out_data.getNullMapColumn();
|
||||
auto & out_nested_column = concrete_out_data.getNestedColumn();
|
||||
execute(null_maps, out_null_map, input_rows_count);
|
||||
execute(nested_columns, out_nested_column, input_rows_count);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool executeTuple(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
ColumnTuple * concrete_out_data = typeid_cast<ColumnTuple *>(&out_data);
|
||||
if (!concrete_out_data)
|
||||
return false;
|
||||
|
||||
const size_t tuple_size = concrete_out_data->tupleSize();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
ColumnRawPtrs elem_columns(columns.size(), nullptr);
|
||||
for (size_t j = 0; j < columns.size(); ++j)
|
||||
{
|
||||
const ColumnTuple * concrete_column = assert_cast<const ColumnTuple *>(columns[j]);
|
||||
elem_columns[j] = &concrete_column->getColumn(i);
|
||||
}
|
||||
execute(elem_columns, concrete_out_data->getColumn(i), input_rows_count);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool executeGeneric(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const
|
||||
{
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
for (const auto * column : columns)
|
||||
out_data.insertFrom(*column, i);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
bool addField(DataTypePtr type_res, const Field & f, Array & arr) const;
|
||||
|
||||
bool use_variant_as_common_type = false;
|
||||
};
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
@ -13,7 +15,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -36,11 +37,18 @@ class FunctionMap : public IFunction
|
||||
public:
|
||||
static constexpr auto name = "map";
|
||||
|
||||
explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {}
|
||||
explicit FunctionMap(ContextPtr context_)
|
||||
: context(context_)
|
||||
, use_variant_as_common_type(
|
||||
context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type)
|
||||
, function_array(FunctionFactory::instance().get("array", context))
|
||||
, function_map_from_arrays(FunctionFactory::instance().get("mapFromArrays", context))
|
||||
{
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionMap>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
|
||||
return std::make_shared<FunctionMap>(context);
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
@ -101,62 +109,38 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
size_t num_elements = arguments.size();
|
||||
|
||||
if (num_elements == 0)
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
|
||||
ColumnsWithTypeAndName key_args;
|
||||
ColumnsWithTypeAndName value_args;
|
||||
for (size_t i = 0; i < num_elements; i += 2)
|
||||
{
|
||||
key_args.emplace_back(arguments[i]);
|
||||
value_args.emplace_back(arguments[i+1]);
|
||||
}
|
||||
|
||||
const auto & result_type_map = static_cast<const DataTypeMap &>(*result_type);
|
||||
const DataTypePtr & key_type = result_type_map.getKeyType();
|
||||
const DataTypePtr & value_type = result_type_map.getValueType();
|
||||
const DataTypePtr & key_array_type = std::make_shared<DataTypeArray>(key_type);
|
||||
const DataTypePtr & value_array_type = std::make_shared<DataTypeArray>(value_type);
|
||||
|
||||
Columns columns_holder(num_elements);
|
||||
ColumnRawPtrs column_ptrs(num_elements);
|
||||
/// key_array = array(args[0], args[2]...)
|
||||
ColumnPtr key_array = function_array->build(key_args)->execute(key_args, key_array_type, input_rows_count);
|
||||
/// value_array = array(args[1], args[3]...)
|
||||
ColumnPtr value_array = function_array->build(value_args)->execute(value_args, value_array_type, input_rows_count);
|
||||
|
||||
for (size_t i = 0; i < num_elements; ++i)
|
||||
{
|
||||
const auto & arg = arguments[i];
|
||||
const auto to_type = i % 2 == 0 ? key_type : value_type;
|
||||
|
||||
ColumnPtr preprocessed_column = castColumn(arg, to_type);
|
||||
preprocessed_column = preprocessed_column->convertToFullColumnIfConst();
|
||||
|
||||
columns_holder[i] = std::move(preprocessed_column);
|
||||
column_ptrs[i] = columns_holder[i].get();
|
||||
}
|
||||
|
||||
/// Create and fill the result map.
|
||||
|
||||
MutableColumnPtr keys_data = key_type->createColumn();
|
||||
MutableColumnPtr values_data = value_type->createColumn();
|
||||
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
|
||||
|
||||
size_t total_elements = input_rows_count * num_elements / 2;
|
||||
keys_data->reserve(total_elements);
|
||||
values_data->reserve(total_elements);
|
||||
offsets->reserve(input_rows_count);
|
||||
|
||||
IColumn::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < num_elements; j += 2)
|
||||
{
|
||||
keys_data->insertFrom(*column_ptrs[j], i);
|
||||
values_data->insertFrom(*column_ptrs[j + 1], i);
|
||||
}
|
||||
|
||||
current_offset += num_elements / 2;
|
||||
offsets->insert(current_offset);
|
||||
}
|
||||
|
||||
auto nested_column = ColumnArray::create(
|
||||
ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}),
|
||||
std::move(offsets));
|
||||
|
||||
return ColumnMap::create(nested_column);
|
||||
/// result = mapFromArrays(key_array, value_array)
|
||||
ColumnsWithTypeAndName map_args{{key_array, key_array_type, ""}, {value_array, value_array_type, ""}};
|
||||
return function_map_from_arrays->build(map_args)->execute(map_args, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
ContextPtr context;
|
||||
bool use_variant_as_common_type = false;
|
||||
FunctionOverloadResolverPtr function_array;
|
||||
FunctionOverloadResolverPtr function_map_from_arrays;
|
||||
};
|
||||
|
||||
/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays or maps
|
||||
@ -173,6 +157,7 @@ public:
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user