optimize array() with single arguments and map() with double arguments

This commit is contained in:
taiyang-li 2024-08-03 17:03:27 +08:00
parent aaf5412c71
commit 0f2024306d
2 changed files with 51 additions and 24 deletions

View File

@ -46,8 +46,10 @@ public:
size_t num_elements = arguments.size();
if (num_elements == 0)
{
/// We should return constant empty array.
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*result_type).getNestedType();
@ -59,7 +61,6 @@ public:
Columns columns_holder(num_elements);
ColumnRawPtrs column_ptrs(num_elements);
for (size_t i = 0; i < num_elements; ++i)
{
const auto & arg = arguments[i];
@ -76,15 +77,25 @@ public:
}
/// Create and fill the result array.
auto out = ColumnArray::create(elem_type->createColumn());
IColumn & out_data = out->getData();
IColumn::Offsets & out_offsets = out->getOffsets();
out_data.reserve(input_rows_count * num_elements);
out_offsets.resize(input_rows_count);
out_offsets.resize_exact(input_rows_count);
IColumn::Offset current_offset = 0;
if (num_elements == 1)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
++current_offset;
out_offsets[i] = current_offset;
}
out_data.insertManyFrom(*column_ptrs[0], 0, input_rows_count);
}
else
{
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = 0; j < num_elements; ++j)
@ -93,6 +104,7 @@ public:
current_offset += num_elements;
out_offsets[i] = current_offset;
}
}
return out;
}

View File

@ -1,14 +1,15 @@
#include <Functions/IFunction.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnMap.h>
#include <Interpreters/castColumn.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Interpreters/castColumn.h>
#include <Common/HashTable/HashSet.h>
@ -123,14 +124,27 @@ public:
MutableColumnPtr keys_data = key_type->createColumn();
MutableColumnPtr values_data = value_type->createColumn();
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
MutableColumnPtr offsets = DataTypeUInt64().createColumn();
size_t total_elements = input_rows_count * num_elements / 2;
keys_data->reserve(total_elements);
values_data->reserve(total_elements);
offsets->reserve(input_rows_count);
auto & offsets_data = assert_cast<ColumnUInt64 &>(*offsets).getData();
offsets_data.resize_exact(input_rows_count);
IColumn::Offset current_offset = 0;
if (num_elements == 2)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
++current_offset;
offsets_data[i] = current_offset;
}
keys_data->insertManyFrom(*column_ptrs[0], 0, input_rows_count);
values_data->insertManyFrom(*column_ptrs[1], 0, input_rows_count);
}
else
{
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = 0; j < num_elements; j += 2)
@ -140,7 +154,8 @@ public:
}
current_offset += num_elements / 2;
offsets->insert(current_offset);
offsets_data[i] = current_offset;
}
}
auto nested_column = ColumnArray::create(