optimize array() with single arguments and map() with double arguments

This commit is contained in:
taiyang-li 2024-08-03 17:03:27 +08:00
parent aaf5412c71
commit 0f2024306d
2 changed files with 51 additions and 24 deletions

View File

@ -46,8 +46,10 @@ public:
size_t num_elements = arguments.size(); size_t num_elements = arguments.size();
if (num_elements == 0) if (num_elements == 0)
{
/// We should return constant empty array. /// We should return constant empty array.
return result_type->createColumnConstWithDefaultValue(input_rows_count); return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*result_type).getNestedType(); const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*result_type).getNestedType();
@ -59,7 +61,6 @@ public:
Columns columns_holder(num_elements); Columns columns_holder(num_elements);
ColumnRawPtrs column_ptrs(num_elements); ColumnRawPtrs column_ptrs(num_elements);
for (size_t i = 0; i < num_elements; ++i) for (size_t i = 0; i < num_elements; ++i)
{ {
const auto & arg = arguments[i]; const auto & arg = arguments[i];
@ -76,22 +77,33 @@ public:
} }
/// Create and fill the result array. /// Create and fill the result array.
auto out = ColumnArray::create(elem_type->createColumn()); auto out = ColumnArray::create(elem_type->createColumn());
IColumn & out_data = out->getData(); IColumn & out_data = out->getData();
IColumn::Offsets & out_offsets = out->getOffsets(); IColumn::Offsets & out_offsets = out->getOffsets();
out_data.reserve(input_rows_count * num_elements); out_data.reserve(input_rows_count * num_elements);
out_offsets.resize(input_rows_count); out_offsets.resize_exact(input_rows_count);
IColumn::Offset current_offset = 0; IColumn::Offset current_offset = 0;
for (size_t i = 0; i < input_rows_count; ++i) if (num_elements == 1)
{ {
for (size_t j = 0; j < num_elements; ++j) for (size_t i = 0; i < input_rows_count; ++i)
out_data.insertFrom(*column_ptrs[j], i); {
++current_offset;
out_offsets[i] = current_offset;
}
out_data.insertManyFrom(*column_ptrs[0], 0, input_rows_count);
}
else
{
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = 0; j < num_elements; ++j)
out_data.insertFrom(*column_ptrs[j], i);
current_offset += num_elements; current_offset += num_elements;
out_offsets[i] = current_offset; out_offsets[i] = current_offset;
}
} }
return out; return out;

View File

@ -1,14 +1,15 @@
#include <Functions/IFunction.h> #include <Columns/ColumnMap.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeMap.h> #include <Functions/IFunction.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnMap.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/castColumn.h>
#include <Common/HashTable/HashSet.h> #include <Common/HashTable/HashSet.h>
@ -123,24 +124,38 @@ public:
MutableColumnPtr keys_data = key_type->createColumn(); MutableColumnPtr keys_data = key_type->createColumn();
MutableColumnPtr values_data = value_type->createColumn(); MutableColumnPtr values_data = value_type->createColumn();
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn(); MutableColumnPtr offsets = DataTypeUInt64().createColumn();
size_t total_elements = input_rows_count * num_elements / 2; size_t total_elements = input_rows_count * num_elements / 2;
keys_data->reserve(total_elements); keys_data->reserve(total_elements);
values_data->reserve(total_elements); values_data->reserve(total_elements);
offsets->reserve(input_rows_count); auto & offsets_data = assert_cast<ColumnUInt64 &>(*offsets).getData();
offsets_data.resize_exact(input_rows_count);
IColumn::Offset current_offset = 0; IColumn::Offset current_offset = 0;
for (size_t i = 0; i < input_rows_count; ++i) if (num_elements == 2)
{ {
for (size_t j = 0; j < num_elements; j += 2) for (size_t i = 0; i < input_rows_count; ++i)
{ {
keys_data->insertFrom(*column_ptrs[j], i); ++current_offset;
values_data->insertFrom(*column_ptrs[j + 1], i); offsets_data[i] = current_offset;
} }
keys_data->insertManyFrom(*column_ptrs[0], 0, input_rows_count);
values_data->insertManyFrom(*column_ptrs[1], 0, input_rows_count);
}
else
{
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = 0; j < num_elements; j += 2)
{
keys_data->insertFrom(*column_ptrs[j], i);
values_data->insertFrom(*column_ptrs[j + 1], i);
}
current_offset += num_elements / 2; current_offset += num_elements / 2;
offsets->insert(current_offset); offsets_data[i] = current_offset;
}
} }
auto nested_column = ColumnArray::create( auto nested_column = ColumnArray::create(