added groupBitmapAnd, groupBitmapOr, groupBitmapXor

This commit is contained in:
Zhichang Yu 2019-09-19 17:20:49 +08:00
parent 8810f9cefa
commit 8945714c27
7 changed files with 334 additions and 2 deletions

View File

@ -2,7 +2,7 @@
#include <AggregateFunctions/AggregateFunctionGroupBitmap.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeAggregateFunction.h>
namespace DB
{
@ -29,12 +29,36 @@ AggregateFunctionPtr createAggregateFunctionBitmap(const std::string & name, con
return res;
}
template <template <typename, typename> class AggregateFunctionTemplate>
AggregateFunctionPtr createAggregateFunctionBitmapL2(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
DataTypePtr argument_type_ptr = argument_types[0];
WhichDataType which(*argument_type_ptr);
if (which.idx == TypeIndex::AggregateFunction)
{
const DataTypeAggregateFunction& datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction&>(*argument_type_ptr);
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
argument_type_ptr = aggfunc->getArgumentTypes()[0];
}
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(*argument_type_ptr, argument_type_ptr));
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
}
void registerAggregateFunctionsBitmap(AggregateFunctionFactory & factory)
{
factory.registerFunction("groupBitmap", createAggregateFunctionBitmap<AggregateFunctionGroupBitmapData>);
factory.registerFunction("groupBitmapAnd", createAggregateFunctionBitmapL2<AggregateFunctionBitmapL2And>);
factory.registerFunction("groupBitmapOr", createAggregateFunctionBitmapL2<AggregateFunctionBitmapL2Or>);
factory.registerFunction("groupBitmapXor", createAggregateFunctionBitmapL2<AggregateFunctionBitmapL2Xor>);
}
}

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnAggregateFunction.h>
namespace DB
{
@ -53,4 +54,105 @@ public:
};
template <typename T, typename Data, typename Policy>
class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
{
public:
AggregateFunctionBitmapL2(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {}){}
String getName() const override { return Data::name(); }
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeNumber<T>>();
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
Data & data_lhs = this->data(place);
const Data & data_rhs = this->data(static_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
if (!data_lhs.doneFirst)
{
data_lhs.doneFirst = true;
data_lhs.rbs.rb_or(data_rhs.rbs);
}
else
{
Policy::apply(data_lhs, data_rhs);
}
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
Data & data_lhs = this->data(place);
const Data & data_rhs = this->data(rhs);
if (!data_lhs.doneFirst)
{
data_lhs.doneFirst = true;
data_lhs.rbs.rb_or(data_rhs.rbs);
}
else
{
Policy::apply(data_lhs, data_rhs);
}
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).rbs.write(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
this->data(place).rbs.read(buf);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
static_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).rbs.size());
}
const char * getHeaderFilePath() const override { return __FILE__; }
};
template <typename Data>
class BitmapAndPolicy
{
public:
static void apply(Data& lhs, const Data& rhs)
{
lhs.rbs.rb_and(rhs.rbs);
}
};
template <typename Data>
class BitmapOrPolicy
{
public:
static void apply(Data& lhs, const Data& rhs)
{
lhs.rbs.rb_or(rhs.rbs);
}
};
template <typename Data>
class BitmapXorPolicy
{
public:
static void apply(Data& lhs, const Data& rhs)
{
lhs.rbs.rb_xor(rhs.rbs);
}
};
template <typename T, typename Data>
using AggregateFunctionBitmapL2And = AggregateFunctionBitmapL2<T, Data, BitmapAndPolicy<Data> >;
template <typename T, typename Data>
using AggregateFunctionBitmapL2Or = AggregateFunctionBitmapL2<T, Data, BitmapOrPolicy<Data> >;
template <typename T, typename Data>
using AggregateFunctionBitmapL2Xor = AggregateFunctionBitmapL2<T, Data, BitmapXorPolicy<Data> >;
}

View File

@ -704,6 +704,7 @@ private:
template <typename T>
struct AggregateFunctionGroupBitmapData
{
bool doneFirst = false;
RoaringBitmapWithSmallSet<T, 32> rbs;
static const char * name() { return "groupBitmap"; }
};

View File

@ -52,6 +52,17 @@ DataTypePtr AggregateFunctionState::getReturnType() const
return arguments[0];
}
if (arguments.size() > 0)
{
DataTypePtr argument_type_ptr = arguments[0];
WhichDataType which(*argument_type_ptr);
if (which.idx == TypeIndex::AggregateFunction)
{
if (arguments.size() != 1)
throw Exception("Nested aggregation expects only one argument", ErrorCodes::BAD_ARGUMENTS);
return arguments[0];
}
}
return ptr;
}

View File

@ -26,6 +26,20 @@
1
1
1
15
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
15
15
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
3
3
[6,8,10]
[6,8,10]
10
10
[1,3,5,6,8,10,11,13,14,15]
[1,3,5,6,8,10,11,13,14,15]
0
0
0

View File

@ -103,11 +103,41 @@ SELECT bitmapAndCardinality( z, bitmapBuild(cast([19,7] AS Array(UInt32))) ) FRO
SELECT bitmapCardinality(bitmapAnd(bitmapBuild(cast([19,7] AS Array(UInt32))), z )) FROM bitmap_column_expr_test;
SELECT bitmapCardinality(bitmapAnd(z, bitmapBuild(cast([19,7] AS Array(UInt32))))) FROM bitmap_column_expr_test;
DROP TABLE IF EXISTS bitmap_column_expr_test2;
CREATE TABLE bitmap_column_expr_test2
(
tag_id String,
z AggregateFunction(groupBitmap, UInt32)
)
ENGINE = MergeTree
ORDER BY tag_id;
INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32))));
SELECT groupBitmapMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapOrMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapOrMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapAndMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapAndMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT groupBitmapXorMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
SELECT arraySort(bitmapToArray(groupBitmapXorMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
DROP TABLE IF EXISTS bitmap_test;
DROP TABLE IF EXISTS bitmap_state_test;
DROP TABLE IF EXISTS bitmap_column_expr_test;
DROP TABLE IF EXISTS bitmap_column_expr_test2;
-- bitmapHasAny:
---- Empty

View File

@ -1165,4 +1165,154 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')
- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression)
## groupBitmapAnd
Calculations the AND of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md).
```sql
groupBitmapAnd(expr)
```
**Parameters**
`expr` An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
**Return value**
Value of the `UInt64` type.
**Example**
```sql
DROP TABLE IF EXISTS bitmap_column_expr_test2;
CREATE TABLE bitmap_column_expr_test2
(
tag_id String,
z AggregateFunction(groupBitmap, UInt32)
)
ENGINE = MergeTree
ORDER BY tag_id;
INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32))));
SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapAnd(z)─┐
│ 3 │
└───────────────────┘
SELECT groupBitmapAndMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapAnd(z)─┐
│ 3 │
└───────────────────┘
SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐
│ [6,8,10] │
└──────────────────────────────────────────────────┘
```
## groupBitmapOr
Calculations the OR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md). This is equivalent to `groupBitmapMerge`.
```sql
groupBitmapOr(expr)
```
**Parameters**
`expr` An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
**Return value**
Value of the `UInt64` type.
**Example**
```sql
DROP TABLE IF EXISTS bitmap_column_expr_test2;
CREATE TABLE bitmap_column_expr_test2
(
tag_id String,
z AggregateFunction(groupBitmap, UInt32)
)
ENGINE = MergeTree
ORDER BY tag_id;
INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32))));
SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapOr(z)─┐
│ 15 │
└──────────────────┘
SELECT groupBitmapOrMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapOrMerge(z)─┐
│ 15 │
└───────────────────────┘
SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐
│ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │
└─────────────────────────────────────────────────┘
```
## groupBitmapXor
Calculations the XOR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md).
```sql
groupBitmapOr(expr)
```
**Parameters**
`expr` An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
**Return value**
Value of the `UInt64` type.
**Example**
```sql
DROP TABLE IF EXISTS bitmap_column_expr_test2;
CREATE TABLE bitmap_column_expr_test2
(
tag_id String,
z AggregateFunction(groupBitmap, UInt32)
)
ENGINE = MergeTree
ORDER BY tag_id;
INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32))));
SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapXor(z)─┐
│ 10 │
└───────────────────┘
SELECT groupBitmapXorMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─groupBitmapXorMerge(z)─┐
│ 10 │
└────────────────────────┘
SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐
│ [1,3,5,6,8,10,11,13,14,15] │
└──────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.yandex/docs/en/query_language/agg_functions/reference/) <!--hide-->