mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-11 17:02:25 +00:00
Add SimpleAggregateFunction support for sumMap
This commit is contained in:
parent
fa8e4e4735
commit
6244270809
@ -330,9 +330,10 @@ Computes the sum of the numbers, using the same data type for the result as for
|
||||
|
||||
Only works for numbers.
|
||||
|
||||
## sumMap(key, value) {#agg_functions-summap}
|
||||
## sumMap(key, value), sumMap(Tuple(key, value)) {#agg_functions-summap}
|
||||
|
||||
Totals the ‘value’ array according to the keys specified in the ‘key’ array.
|
||||
Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values.
|
||||
The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled.
|
||||
Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
|
||||
|
||||
@ -345,25 +346,28 @@ CREATE TABLE sum_map(
|
||||
statusMap Nested(
|
||||
status UInt16,
|
||||
requests UInt64
|
||||
)
|
||||
),
|
||||
statusMapTuple Tuple(Array(Int32), Array(Int32))
|
||||
) ENGINE = Log;
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10], ([4, 5, 6], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10], ([6, 7, 8], [10, 10, 10]));
|
||||
|
||||
SELECT
|
||||
timeslot,
|
||||
sumMap(statusMap.status, statusMap.requests)
|
||||
sumMap(statusMap.status, statusMap.requests),
|
||||
sumMap(statusMapTuple)
|
||||
FROM sum_map
|
||||
GROUP BY timeslot
|
||||
```
|
||||
|
||||
``` text
|
||||
┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐
|
||||
│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │
|
||||
│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │
|
||||
└─────────────────────┴──────────────────────────────────────────────┘
|
||||
┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┬─sumMap(statusMapTuple)─────────┐
|
||||
│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ ([1,2,3,4,5],[10,10,20,10,10]) │
|
||||
│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ ([4,5,6,7,8],[10,10,20,10,10]) │
|
||||
└─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘
|
||||
```
|
||||
|
||||
## skewPop {#skewpop}
|
||||
|
@ -52,23 +52,37 @@ using SumMapArgs = std::pair<DataTypePtr, DataTypes>;
|
||||
|
||||
SumMapArgs parseArguments(const std::string & name, const DataTypes & arguments)
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception("Aggregate function " + name + " requires at least two arguments of Array type.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
DataTypes args;
|
||||
|
||||
const auto * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
|
||||
if (!array_type)
|
||||
throw Exception("First argument for function " + name + " must be an array.",
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
const auto * tuple_type = checkAndGetDataType<DataTypeTuple>(arguments[0].get());
|
||||
if (!tuple_type)
|
||||
throw Exception("When function " + name + " gets one argument it must be a tuple",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto elems = tuple_type->getElements();
|
||||
args.insert(args.end(), elems.begin(), elems.end());
|
||||
}
|
||||
else
|
||||
args.insert(args.end(), arguments.begin(), arguments.end());
|
||||
|
||||
if (args.size() < 2)
|
||||
throw Exception("Aggregate function " + name + " requires at least two arguments of Array type or one argument of tuple of two arrays",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
const auto * array_type = checkAndGetDataType<DataTypeArray>(args[0].get());
|
||||
if (!array_type)
|
||||
throw Exception("First argument for function " + name + " must be an array, not " + args[0]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
DataTypePtr keys_type = array_type->getNestedType();
|
||||
|
||||
DataTypes values_types;
|
||||
values_types.reserve(arguments.size() - 1);
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
values_types.reserve(args.size() - 1);
|
||||
for (size_t i = 1; i < args.size(); ++i)
|
||||
{
|
||||
array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
|
||||
array_type = checkAndGetDataType<DataTypeArray>(args[i].get());
|
||||
if (!array_type)
|
||||
throw Exception("Argument #" + toString(i) + " for function " + name + " must be an array.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
@ -80,6 +80,18 @@ public:
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
// Check if tuple
|
||||
std::unique_ptr<const IColumn *[]> tuple_columns;
|
||||
auto tuple_col = checkAndGetColumn<ColumnTuple>(columns[0]);
|
||||
if (tuple_col)
|
||||
{
|
||||
tuple_columns.reset(new const IColumn*[tuple_col->tupleSize()]);
|
||||
for (size_t i = 0; i < tuple_col->tupleSize(); i++)
|
||||
tuple_columns.get()[i] = &const_cast<IColumn&>(tuple_col->getColumn(i));
|
||||
|
||||
columns = tuple_columns.get();
|
||||
}
|
||||
|
||||
// Column 0 contains array of keys of known type
|
||||
Field key_field;
|
||||
const ColumnArray & array_column0 = assert_cast<const ColumnArray &>(*columns[0]);
|
||||
|
@ -30,7 +30,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum", "groupBitAnd", "groupBitOr", "groupBitXor"};
|
||||
static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum", "groupBitAnd", "groupBitOr", "groupBitXor", "sumMap"};
|
||||
|
||||
|
||||
String DataTypeCustomSimpleAggregateFunction::getName() const
|
||||
|
@ -4,6 +4,7 @@
|
||||
2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10]
|
||||
([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
|
||||
([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
|
||||
([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
|
||||
2000-01-01 00:00:00 ([1,2,3,4,5],[10,10,20,10,10])
|
||||
2000-01-01 00:01:00 ([4,5,6,7,8],[10,10,20,10,10])
|
||||
2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10]
|
||||
|
@ -7,6 +7,7 @@ INSERT INTO sum_map VALUES ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10,
|
||||
|
||||
SELECT * FROM sum_map ORDER BY timeslot;
|
||||
SELECT sumMap(statusMap.status, statusMap.requests) FROM sum_map;
|
||||
SELECT sumMap((statusMap.status, statusMap.requests)) FROM sum_map;
|
||||
SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM sum_map);
|
||||
SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM sum_map GROUP BY timeslot ORDER BY timeslot;
|
||||
SELECT timeslot, sumMap(statusMap.status, statusMap.requests).1, sumMap(statusMap.status, statusMap.requests).2 FROM sum_map GROUP BY timeslot ORDER BY timeslot;
|
||||
|
@ -39,6 +39,6 @@ SimpleAggregateFunction(sum, Float64)
|
||||
7 14
|
||||
8 16
|
||||
9 18
|
||||
1 1 2 2.2.2.2 3
|
||||
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5
|
||||
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32)
|
||||
1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1])
|
||||
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1])
|
||||
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64)))
|
||||
|
@ -24,16 +24,17 @@ create table simple (
|
||||
nullable_str SimpleAggregateFunction(anyLast,Nullable(String)),
|
||||
low_str SimpleAggregateFunction(anyLast,LowCardinality(Nullable(String))),
|
||||
ip SimpleAggregateFunction(anyLast,IPv4),
|
||||
status SimpleAggregateFunction(groupBitOr, UInt32)
|
||||
status SimpleAggregateFunction(groupBitOr, UInt32),
|
||||
tup SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64)))
|
||||
) engine=AggregatingMergeTree order by id;
|
||||
insert into simple values(1,'1','1','1.1.1.1', 1);
|
||||
insert into simple values(1,null,'2','2.2.2.2', 2);
|
||||
insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]));
|
||||
insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]));
|
||||
-- String longer then MAX_SMALL_STRING_SIZE (actual string length is 100)
|
||||
insert into simple values(10,'10','10','10.10.10.10', 4);
|
||||
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1);
|
||||
insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]));
|
||||
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]));
|
||||
|
||||
select * from simple final;
|
||||
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status) from simple limit 1;
|
||||
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup) from simple limit 1;
|
||||
|
||||
optimize table simple final;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user