mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Added 'anyHeavy' aggregate function [#METR-21691].
This commit is contained in:
parent
6ba6d01491
commit
b408afa74f
@ -133,6 +133,16 @@ struct SingleValueDataFixed
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEqualTo(const Self & to) const
|
||||
{
|
||||
return has() && to.value == value;
|
||||
}
|
||||
|
||||
bool isEqualTo(const IColumn & column, size_t row_num) const
|
||||
{
|
||||
return has() && static_cast<const ColumnVector<T> &>(column).getData()[row_num] == value;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -334,6 +344,16 @@ struct __attribute__((__packed__, __aligned__(1))) SingleValueDataString
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEqualTo(const Self & to) const
|
||||
{
|
||||
return has() && to.getStringRef() == getStringRef();
|
||||
}
|
||||
|
||||
bool isEqualTo(const IColumn & column, size_t row_num) const
|
||||
{
|
||||
return has() && static_cast<const ColumnString &>(column).getDataAtWithTerminatingZero(row_num) == getStringRef();
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(
|
||||
@ -476,6 +496,16 @@ struct SingleValueDataGeneric
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEqualTo(const IColumn & column, size_t row_num) const
|
||||
{
|
||||
return has() && value == column[row_num];
|
||||
}
|
||||
|
||||
bool isEqualTo(const Self & to) const
|
||||
{
|
||||
return has() && to.value == value;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -529,6 +559,60 @@ struct AggregateFunctionAnyLastData : Data
|
||||
};
|
||||
|
||||
|
||||
/** Implement 'heavy hitters' algorithm.
|
||||
* Selects most frequent value if its frequency is more than 50%.
|
||||
* Otherwise, selects some arbitary value.
|
||||
* http://www.cs.umd.edu/~samir/498/karp.pdf
|
||||
*/
|
||||
template <typename Data>
|
||||
struct AggregateFunctionAnyHeavyData : Data
|
||||
{
|
||||
size_t counter = 0;
|
||||
|
||||
using Self = AggregateFunctionAnyHeavyData<Data>;
|
||||
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num)
|
||||
{
|
||||
if (this->isEqualTo(column, row_num))
|
||||
{
|
||||
++counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (counter == 0)
|
||||
{
|
||||
this->change(column, row_num);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
--counter;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool changeIfBetter(const Self & to)
|
||||
{
|
||||
if (this->isEqualTo(to))
|
||||
{
|
||||
counter += to.counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (counter < to.counter)
|
||||
{
|
||||
this->change(to);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
counter -= to.counter;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char * name() { return "anyHeavy"; }
|
||||
};
|
||||
|
||||
|
||||
template <typename Data>
|
||||
class AggregateFunctionsSingleValue final : public IUnaryAggregateFunction<Data, AggregateFunctionsSingleValue<Data> >
|
||||
{
|
||||
|
@ -17,6 +17,11 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMin(const std::string & name, const DataTypes & argument_types)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types));
|
||||
@ -43,6 +48,7 @@ void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction({"any"}, createAggregateFunctionAny);
|
||||
factory.registerFunction({"anyLast"}, createAggregateFunctionAnyLast);
|
||||
factory.registerFunction({"anyHeavy"}, createAggregateFunctionAnyHeavy);
|
||||
factory.registerFunction({"min"}, createAggregateFunctionMin);
|
||||
factory.registerFunction({"max"}, createAggregateFunctionMax);
|
||||
factory.registerFunction({"argMin"}, createAggregateFunctionArgMin);
|
||||
|
Loading…
Reference in New Issue
Block a user