add minMap and maxMap functions

This commit is contained in:
Ildus Kurbangaliev 2020-06-11 10:31:37 +00:00
parent 09a37db2a3
commit 01d903c60d
6 changed files with 310 additions and 17 deletions

View File

@ -370,6 +370,46 @@ GROUP BY timeslot
└─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘
```
## minMap(key, value), minMap(Tuple(key, value)) {#agg_functions-minmap}
Calculates the minimum from value array according to the keys specified in the key array.
Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values.
The number of elements in key and value must be the same for each row that is totaled.
Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
Example:
```sql
SELECT minMap(a, b)
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
```
```text
┌─minMap(a, b)──────┐
│ ([1,2,3],[2,1,1]) │
└───────────────────┘
```
## maxMap(key, value), maxMap(Tuple(key, value)) {#agg_functions-maxmap}
Calculates the maximum from value array according to the keys specified in the key array.
Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values.
The number of elements in key and value must be the same for each row that is totaled.
Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
Example:
```sql
SELECT maxMap(a, b)
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
```
```text
┌─maxMap(a, b)──────┐
│ ([1,2,3],[2,2,1]) │
└───────────────────┘
```
## skewPop {#skewpop}
Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence.

View File

@ -32,6 +32,20 @@ struct SumMapFiltered
using F = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
};
template <bool tuple_argument>
struct MinMap
{
template <typename T>
using F = AggregateFunctionMinMap<T, tuple_argument>;
};
template <bool tuple_argument>
struct MaxMap
{
template <typename T>
using F = AggregateFunctionMaxMap<T, tuple_argument>;
};
auto parseArguments(const std::string & name, const DataTypes & arguments)
{
@ -154,6 +168,64 @@ AggregateFunctionPtr createAggregateFunctionSumMapFiltered(const std::string & n
return res;
}
AggregateFunctionPtr createAggregateFunctionMinMap(const std::string & name, const DataTypes & arguments, const Array & params)
{
assertNoParameters(name, params);
auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
AggregateFunctionPtr res;
if (tuple_argument)
{
res.reset(createWithNumericBasedType<MinMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithDecimalType<MinMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithStringType<MinMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
}
else
{
res.reset(createWithNumericBasedType<MinMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithDecimalType<MinMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithStringType<MinMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
}
if (!res)
throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
AggregateFunctionPtr createAggregateFunctionMaxMap(const std::string & name, const DataTypes & arguments, const Array & params)
{
assertNoParameters(name, params);
auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
AggregateFunctionPtr res;
if (tuple_argument)
{
res.reset(createWithNumericBasedType<MaxMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithDecimalType<MaxMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithStringType<MaxMap<true>::template F>(*keys_type, keys_type, values_types, arguments));
}
else
{
res.reset(createWithNumericBasedType<MaxMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithDecimalType<MaxMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
if (!res)
res.reset(createWithStringType<MaxMap<false>::template F>(*keys_type, keys_type, values_types, arguments));
}
if (!res)
throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
}
void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory)
@ -162,6 +234,8 @@ void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory)
factory.registerFunction("sumMapWithOverflow", createAggregateFunctionSumMap<true /*overflow*/>);
factory.registerFunction("sumMapFiltered", createAggregateFunctionSumMapFiltered<false /*overflow*/>);
factory.registerFunction("sumMapFilteredWithOverflow", createAggregateFunctionSumMapFiltered<true /*overflow*/>);
factory.registerFunction("minMap", createAggregateFunctionMinMap);
factory.registerFunction("maxMap", createAggregateFunctionMaxMap);
}
}

View File

@ -28,16 +28,16 @@ namespace ErrorCodes
}
template <typename T>
struct AggregateFunctionSumMapData
struct AggregateFunctionXxxMapData
{
// Map needs to be ordered to maintain function properties
std::map<T, Array> merged_maps;
};
/** Aggregate function, that takes at least two arguments: keys and values, and as a result, builds a tuple of of at least 2 arrays -
* ordered keys and variable number of argument values summed up by corresponding keys.
* ordered keys and variable number of argument values aggregated by corresponding keys.
*
* This function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map".
* sumMap function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map".
*
* Example: sumMap(k, v...) of:
* k v
@ -49,25 +49,25 @@ struct AggregateFunctionSumMapData
* [8,9,10] [20,20,20]
* will return:
* ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20])
*
* minMap and maxMap share the same idea, but calculate min and max correspondingly.
*/
template <typename T, typename Derived, bool overflow, bool tuple_argument>
class AggregateFunctionSumMapBase : public IAggregateFunctionDataHelper<
AggregateFunctionSumMapData<NearestFieldType<T>>, Derived>
template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument>
class AggregateFunctionMapOpBase : public IAggregateFunctionDataHelper<
AggregateFunctionXxxMapData<NearestFieldType<T>>, Derived>
{
private:
DataTypePtr keys_type;
DataTypes values_types;
public:
AggregateFunctionSumMapBase(
AggregateFunctionMapOpBase(
const DataTypePtr & keys_type_, const DataTypes & values_types_,
const DataTypes & argument_types_, const Array & params_)
: IAggregateFunctionDataHelper<AggregateFunctionSumMapData<NearestFieldType<T>>, Derived>(argument_types_, params_)
: IAggregateFunctionDataHelper<AggregateFunctionXxxMapData<NearestFieldType<T>>, Derived>(argument_types_, params_)
, keys_type(keys_type_), values_types(values_types_) {}
String getName() const override { return "sumMap"; }
DataTypePtr getReturnType() const override
{
DataTypes types;
@ -88,7 +88,7 @@ public:
// No overflow, meaning we promote the types if necessary.
if (!value_type->canBePromoted())
{
throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception{"Values for " + getName() + " are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
result_type = value_type->promoteNumericType();
@ -161,7 +161,7 @@ public:
if (it != merged_maps.end())
{
applyVisitor(FieldVisitorSum(value), it->second[col]);
applyVisitor(Visitor(value), it->second[col]);
}
else
{
@ -198,7 +198,7 @@ public:
if (it != merged_maps.end())
{
for (size_t col = 0; col < values_types.size(); ++col)
applyVisitor(FieldVisitorSum(elem.second[col]), it->second[col]);
applyVisitor(Visitor(elem.second[col]), it->second[col]);
}
else
merged_maps[elem.first] = elem.second;
@ -300,15 +300,16 @@ public:
}
bool keepKey(const T & key) const { return static_cast<const Derived &>(*this).keepKey(key); }
String getName() const override { return static_cast<const Derived &>(*this).getName(); }
};
template <typename T, bool overflow, bool tuple_argument>
class AggregateFunctionSumMap final :
public AggregateFunctionSumMapBase<T, AggregateFunctionSumMap<T, overflow, tuple_argument>, overflow, tuple_argument>
public AggregateFunctionMapOpBase<T, AggregateFunctionSumMap<T, overflow, tuple_argument>, FieldVisitorSum, overflow, tuple_argument>
{
private:
using Self = AggregateFunctionSumMap<T, overflow, tuple_argument>;
using Base = AggregateFunctionSumMapBase<T, Self, overflow, tuple_argument>;
using Base = AggregateFunctionMapOpBase<T, Self, FieldVisitorSum, overflow, tuple_argument>;
public:
AggregateFunctionSumMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_)
@ -322,14 +323,15 @@ public:
template <typename T, bool overflow, bool tuple_argument>
class AggregateFunctionSumMapFiltered final :
public AggregateFunctionSumMapBase<T,
public AggregateFunctionMapOpBase<T,
AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>,
FieldVisitorSum,
overflow,
tuple_argument>
{
private:
using Self = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
using Base = AggregateFunctionSumMapBase<T, Self, overflow, tuple_argument>;
using Base = AggregateFunctionMapOpBase<T, Self, FieldVisitorSum, overflow, tuple_argument>;
std::unordered_set<T> keys_to_keep;
@ -351,4 +353,40 @@ public:
bool keepKey(const T & key) const { return keys_to_keep.count(key); }
};
template <typename T, bool tuple_argument>
class AggregateFunctionMinMap final :
public AggregateFunctionMapOpBase<T, AggregateFunctionMinMap<T, tuple_argument>, FieldVisitorMin, true, tuple_argument>
{
private:
using Self = AggregateFunctionMinMap<T, tuple_argument>;
using Base = AggregateFunctionMapOpBase<T, Self, FieldVisitorMin, true, tuple_argument>;
public:
AggregateFunctionMinMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_)
: Base{keys_type_, values_types_, argument_types_, {}}
{}
String getName() const override { return "minMap"; }
bool keepKey(const T &) const { return true; }
};
template <typename T, bool tuple_argument>
class AggregateFunctionMaxMap final :
public AggregateFunctionMapOpBase<T, AggregateFunctionMaxMap<T, tuple_argument>, FieldVisitorMax, true, tuple_argument>
{
private:
using Self = AggregateFunctionMaxMap<T, tuple_argument>;
using Base = AggregateFunctionMapOpBase<T, Self, FieldVisitorMax, true, tuple_argument>;
public:
AggregateFunctionMaxMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_)
: Base{keys_type_, values_types_, argument_types_, {}}
{}
String getName() const override { return "maxMap"; }
bool keepKey(const T &) const { return true; }
};
}

View File

@ -449,4 +449,88 @@ public:
}
};
/** Implements `Max` operation.
* Returns true if changed
*/
class FieldVisitorMax : public StaticVisitor<bool>
{
private:
const Field & rhs;
public:
explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {}
bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Array &) const { throw Exception("Cannot compare Arrays", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Tuple &) const { throw Exception("Cannot compare Tuples", ErrorCodes::LOGICAL_ERROR); }
bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
template <typename T>
bool operator() (DecimalField<T> & x) const
{
auto val = get<DecimalField<T>>(rhs);
if (val > x)
{
x = val;
return true;
}
return false;
}
template <typename T>
bool operator() (T & x) const
{
auto val = get<T>(rhs);
if (val > x)
{
x = val;
return true;
}
return false;
}
};
/** Implements `Min` operation.
* Returns true if changed
*/
class FieldVisitorMin : public StaticVisitor<bool>
{
private:
const Field & rhs;
public:
explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {}
bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); }
bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
template <typename T>
bool operator() (DecimalField<T> & x) const
{
auto val = get<DecimalField<T>>(rhs);
if (val < x)
{
x = val;
return true;
}
return false;
}
template <typename T>
bool operator() (T & x) const
{
auto val = get<T>(rhs);
if (val < x)
{
x = val;
return true;
}
return false;
}
};
}

View File

@ -0,0 +1,24 @@
([0,1,2,3,4,5,6,7,8,9,10],[10,1,1,1,1,1,1,1,1,1,1]) Tuple(Array(Int32), Array(UInt64))
([1],[-49])
([1.00],[-49.00])
([0,1,2,3,4,5,6,7,8,9,10],[100,91,92,93,94,95,96,97,98,99,1]) Tuple(Array(Int32), Array(UInt64))
([1],[50])
([1.00],[50.00])
(['01234567-89ab-cdef-0123-456789abcdef'],['01111111-89ab-cdef-0123-456789abcdef'])
(['1'],['1'])
(['1'],['1'])
([1],[1])
([1],[1])
(['1970-01-02'],[1])
(['1970-01-01 03:00:01'],[1])
([1.01],[1])
(['a'],[1])
(['01234567-89ab-cdef-0123-456789abcdef'],['02222222-89ab-cdef-0123-456789abcdef'])
(['1'],['2'])
(['1'],['2'])
([1],[2])
([1],[2])
(['1970-01-02'],[2])
(['1970-01-01 03:00:01'],[2])
([1.01],[2])
(['a'],[2])

View File

@ -0,0 +1,33 @@
select minMap([toInt32(number % 10), number % 10 + 1], [number, 1]) as m, toTypeName(m) from numbers(1, 100);
select minMap([1], [toInt32(number) - 50]) from numbers(1, 100);
select minMap([cast(1, 'Decimal(10, 2)')], [cast(toInt32(number) - 50, 'Decimal(10, 2)')]) from numbers(1, 100);
select maxMap([toInt32(number % 10), number % 10 + 1], [number, 1]) as m, toTypeName(m) from numbers(1, 100);
select maxMap([1], [toInt32(number) - 50]) from numbers(1, 100);
select maxMap([cast(1, 'Decimal(10, 2)')], [cast(toInt32(number) - 50, 'Decimal(10, 2)')]) from numbers(1, 100);
-- check different types for minMap
select minMap(val, cnt) from values ('val Array(UUID), cnt Array(UUID)',
(['01234567-89ab-cdef-0123-456789abcdef'], ['01111111-89ab-cdef-0123-456789abcdef']),
(['01234567-89ab-cdef-0123-456789abcdef'], ['02222222-89ab-cdef-0123-456789abcdef']));
select minMap(val, cnt) from values ('val Array(String), cnt Array(String)', (['1'], ['1']), (['1'], ['2']));
select minMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(FixedString(1))', (['1'], ['1']), (['1'], ['2']));
select minMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2]));
select minMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2]));
select minMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2]));
select minMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2]));
select minMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2]));
select minMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2]));
-- check different types for maxMap
select maxMap(val, cnt) from values ('val Array(UUID), cnt Array(UUID)',
(['01234567-89ab-cdef-0123-456789abcdef'], ['01111111-89ab-cdef-0123-456789abcdef']),
(['01234567-89ab-cdef-0123-456789abcdef'], ['02222222-89ab-cdef-0123-456789abcdef']));
select maxMap(val, cnt) from values ('val Array(String), cnt Array(String)', (['1'], ['1']), (['1'], ['2']));
select maxMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(FixedString(1))', (['1'], ['1']), (['1'], ['2']));
select maxMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2]));
select maxMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2]));
select maxMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2]));
select maxMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2]));
select maxMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2]));
select maxMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2]));