Aggregate function sumMap: improve insertResultInto method and change return type to Tuple(Array(K), Array(V)).

This commit is contained in:
Alex Bocharov 2017-09-19 13:35:25 +01:00
parent 07cd6829d0
commit e99c969a1c
5 changed files with 35 additions and 29 deletions

View File

@ -4,10 +4,10 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnTuple.h>
#include <Core/FieldVisitors.h> #include <Core/FieldVisitors.h>
#include <AggregateFunctions/IBinaryAggregateFunction.h> #include <AggregateFunctions/IBinaryAggregateFunction.h>
@ -43,12 +43,11 @@ struct AggregateFunctionSumMapData
* [7,5,3] [5,15,25] * [7,5,3] [5,15,25]
* [8,9,10] [20,20,20] * [8,9,10] [20,20,20]
* will return: * will return:
* [[1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]] * ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20])
*/ */
class AggregateFunctionSumMap final : public IBinaryAggregateFunction<struct AggregateFunctionSumMapData, AggregateFunctionSumMap> class AggregateFunctionSumMap final : public IBinaryAggregateFunction<struct AggregateFunctionSumMapData, AggregateFunctionSumMap>
{ {
private: private:
DataTypePtr type;
DataTypePtr keys_type; DataTypePtr keys_type;
DataTypePtr values_type; DataTypePtr values_type;
@ -57,7 +56,11 @@ public:
DataTypePtr getReturnType() const override DataTypePtr getReturnType() const override
{ {
return std::make_shared<DataTypeArray>(type); DataTypes types;
types.emplace_back(std::make_shared<DataTypeArray>(keys_type));
types.emplace_back(std::make_shared<DataTypeArray>(values_type));
return std::make_shared<DataTypeTuple>(types);
} }
void setArgumentsImpl(const DataTypes & arguments) void setArgumentsImpl(const DataTypes & arguments)
@ -77,8 +80,6 @@ public:
throw Exception("Second argument for function " + getName() + " must be an array.", throw Exception("Second argument for function " + getName() + " must be an array.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
values_type = array_type->getNestedType(); values_type = array_type->getNestedType();
type = arguments.front();
} }
void setParameters(const Array & params) override void setParameters(const Array & params) override
@ -159,25 +160,30 @@ public:
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{ {
auto & to_array = static_cast<ColumnArray &>(to); auto & to_cols = static_cast<ColumnTuple &>(to).getColumns();
auto & to_data = to_array.getData();
auto & to_offsets = to_array.getOffsets(); auto & to_keys_arr = static_cast<ColumnArray &>(*to_cols[0]);
auto & to_values_arr = static_cast<ColumnArray &>(*to_cols[1]);
auto & to_keys_col = to_keys_arr.getData();
auto & to_keys_offsets = to_keys_arr.getOffsets();
auto & to_values_col = to_values_arr.getData();
auto & to_values_offsets = to_values_arr.getOffsets();
const auto & merged_maps = this->data(place).merged_maps; const auto & merged_maps = this->data(place).merged_maps;
size_t size = merged_maps.size(); size_t size = merged_maps.size();
Array keys, values; to_keys_col.reserve(size);
keys.reserve(size); to_values_col.reserve(size);
values.reserve(size);
for (const auto &v : merged_maps) for (const auto &v : merged_maps)
{ {
keys.push_back(v.first); to_keys_col.insert(v.first);
values.push_back(v.second); to_values_col.insert(v.second);
} }
to_data.insert(keys); to_keys_offsets.push_back((to_keys_offsets.empty() ? 0 : to_keys_offsets.back()) + size);
to_data.insert(values); to_values_offsets.push_back((to_values_offsets.empty() ? 0 : to_values_offsets.back()) + size);
to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + 2);
} }
const char * getHeaderFilePath() const override { return __FILE__; } const char * getHeaderFilePath() const override { return __FILE__; }

View File

@ -2,9 +2,9 @@
2000-01-01 2000-01-01 00:00:00 [3,4,5] [10,10,10] 2000-01-01 2000-01-01 00:00:00 [3,4,5] [10,10,10]
2000-01-01 2000-01-01 00:01:00 [4,5,6] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [4,5,6] [10,10,10]
2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10]
[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
2000-01-01 00:00:00 [[1,2,3,4,5],[10,10,20,10,10]] 2000-01-01 00:00:00 ([1,2,3,4,5],[10,10,20,10,10])
2000-01-01 00:01:00 [[4,5,6,7,8],[10,10,20,10,10]] 2000-01-01 00:01:00 ([4,5,6,7,8],[10,10,20,10,10])
2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10] 2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10]
2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10] 2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10]

View File

@ -7,6 +7,6 @@ SELECT * FROM test.sum_map;
SELECT sumMap(statusMap.status, statusMap.requests) FROM test.sum_map; SELECT sumMap(statusMap.status, statusMap.requests) FROM test.sum_map;
SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM test.sum_map); SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM test.sum_map);
SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM test.sum_map GROUP BY timeslot; SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM test.sum_map GROUP BY timeslot;
SELECT timeslot, sumMap(statusMap.status, statusMap.requests)[1], sumMap(statusMap.status, statusMap.requests)[2] FROM test.sum_map GROUP BY timeslot; SELECT timeslot, sumMap(statusMap.status, statusMap.requests).1, sumMap(statusMap.status, statusMap.requests).2 FROM test.sum_map GROUP BY timeslot;
DROP TABLE test.sum_map; DROP TABLE test.sum_map;

View File

@ -48,7 +48,7 @@ sumMap(key, value)
------ ------
Performs summation of array 'value' by corresponding keys of array 'key'. Performs summation of array 'value' by corresponding keys of array 'key'.
Number of elements in 'key' and 'value' arrays should be the same for each row, on which summation is being performed. Number of elements in 'key' and 'value' arrays should be the same for each row, on which summation is being performed.
Returns array of two arrays - sorted keys and values, summed up by corresponding keys. Returns a tuple of two arrays - sorted keys and values, summed up by corresponding keys.
Example: Example:
@ -76,8 +76,8 @@ GROUP BY timeslot
.. code-block:: text .. code-block:: text
┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ ┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐
│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10])
│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10])
└─────────────────────┴──────────────────────────────────────────────┘ └─────────────────────┴──────────────────────────────────────────────┘
avg(x) avg(x)

View File

@ -48,7 +48,7 @@ sumMap(key, value)
------ ------
Производит суммирование массива 'value' по соотвествующим ключам заданным в массиве 'key'. Производит суммирование массива 'value' по соотвествующим ключам заданным в массиве 'key'.
Количество элементов в 'key' и 'value' должно быть одинаковым для каждой строки, для которой происходит суммирование. Количество элементов в 'key' и 'value' должно быть одинаковым для каждой строки, для которой происходит суммирование.
Возвращает массив из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам. Возвращает кортеж из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам.
Пример: Пример:
@ -76,8 +76,8 @@ GROUP BY timeslot
.. code-block:: text .. code-block:: text
┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ ┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐
│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10])
│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10])
└─────────────────────┴──────────────────────────────────────────────┘ └─────────────────────┴──────────────────────────────────────────────┘
avg(x) avg(x)