Support for bitmapHasAny and bitmapHasAll functions.

This commit is contained in:
svladykin 2019-05-12 17:47:31 +03:00
parent 8ef7f3589a
commit 22389d4eca
7 changed files with 536 additions and 292 deletions

View File

@ -1,2 +1,5 @@
*.bin
*.mrk
*.txt
*.dat
*.idx

View File

@ -308,16 +308,86 @@ public:
/**
* Check whether two bitmaps intersect.
* Intersection with an empty set is always 0 (consistent with hasAny).
*/
UInt8 rb_intersect(const RoaringBitmapWithSmallSet & r1)
UInt8 rb_intersect(const RoaringBitmapWithSmallSet & r1) const
{
if (isSmall())
toLarge();
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
UInt8 is_true = roaring_bitmap_intersect(rb, rb1);
if (r1.isSmall())
roaring_bitmap_free(rb1);
return is_true;
{
if (r1.isSmall()) {
for (const auto & x : r1.small)
if (small.find(x.getValue()) != small.end())
return 1;
}
else
{
for (const auto & x : small)
if (roaring_bitmap_contains(r1.rb, x.getValue()))
return 1;
}
}
else if (r1.isSmall())
{
for (const auto & x : r1.small)
if (roaring_bitmap_contains(rb, x.getValue()))
return 1;
}
else if (roaring_bitmap_intersect(rb, r1.rb))
return 1;
return 0;
}
/**
* Check whether the argument is the subset of this set.
* Empty set is a subset of any other set (consistent with hasAll).
*/
UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
{
if (isSmall())
{
if (r1.isSmall())
{
for (const auto & x : r1.small)
if (small.find(x.getValue()) == small.end())
return 0;
}
else {
UInt64 r1_size = r1.size();
if (r1_size > small.size())
return 0; // A bigger set can not be a subset of ours.
// This is a rare case with a small number of elements on
// both sides: r1 was promoted to large for some reason and
// it is still not larger than our small set.
// If r1 is our subset then our size must be equal to
// r1_size + number of not found elements, if this sum becomes
// greater then r1 is not a subset.
for (const auto & x : small)
if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size())
return 0;
}
}
else if (r1.isSmall())
{
for (const auto & x : r1.small)
if (!roaring_bitmap_contains(rb, x.getValue()))
return 0;
}
else if (!roaring_bitmap_is_subset(r1.rb, rb))
return 0;
return 1;
}
/**
* Check whether this bitmap contains the argument.
*/
UInt8 rb_contains(const UInt32 x) const
{
return isSmall() ? small.find(x) != small.end() :
roaring_bitmap_contains(rb, x);
}
/**

View File

@ -21,5 +21,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
factory.registerFunction<FunctionBitmapXor>();
factory.registerFunction<FunctionBitmapAndnot>();
factory.registerFunction<FunctionBitmapHasAll>();
factory.registerFunction<FunctionBitmapHasAny>();
}
}

View File

@ -342,7 +342,27 @@ struct BitmapAndnotCardinalityImpl
}
};
template <template <typename> class Impl, typename Name>
template <typename T>
struct BitmapHasAllImpl
{
using ReturnType = UInt8;
static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
{
return bd1.rbs.rb_is_subset(bd2.rbs);
}
};
template <typename T>
struct BitmapHasAnyImpl
{
using ReturnType = UInt8;
static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
{
return bd1.rbs.rb_intersect(bd2.rbs);
}
};
template <template <typename> class Impl, typename Name, typename ToType>
class FunctionBitmapCardinality : public IFunction
{
public:
@ -398,8 +418,6 @@ public:
}
private:
using ToType = UInt64;
template <typename T>
void executeIntType(
Block & block, const ColumnNumbers & arguments, size_t input_rows_count, typename ColumnVector<ToType>::Container & vec_to)
@ -571,12 +589,22 @@ struct NameBitmapAndnotCardinality
{
static constexpr auto name = "bitmapAndnotCardinality";
};
struct NameBitmapHasAll
{
static constexpr auto name = "bitmapHasAll";
};
struct NameBitmapHasAny
{
static constexpr auto name = "bitmapHasAny";
};
using FunctionBitmapSelfCardinality = FunctionBitmapSelfCardinalityImpl<NameBitmapCardinality>;
using FunctionBitmapAndCardinality = FunctionBitmapCardinality<BitmapAndCardinalityImpl, NameBitmapAndCardinality>;
using FunctionBitmapOrCardinality = FunctionBitmapCardinality<BitmapOrCardinalityImpl, NameBitmapOrCardinality>;
using FunctionBitmapXorCardinality = FunctionBitmapCardinality<BitmapXorCardinalityImpl, NameBitmapXorCardinality>;
using FunctionBitmapAndnotCardinality = FunctionBitmapCardinality<BitmapAndnotCardinalityImpl, NameBitmapAndnotCardinality>;
using FunctionBitmapAndCardinality = FunctionBitmapCardinality<BitmapAndCardinalityImpl, NameBitmapAndCardinality, UInt64>;
using FunctionBitmapOrCardinality = FunctionBitmapCardinality<BitmapOrCardinalityImpl, NameBitmapOrCardinality, UInt64>;
using FunctionBitmapXorCardinality = FunctionBitmapCardinality<BitmapXorCardinalityImpl, NameBitmapXorCardinality, UInt64>;
using FunctionBitmapAndnotCardinality = FunctionBitmapCardinality<BitmapAndnotCardinalityImpl, NameBitmapAndnotCardinality, UInt64>;
using FunctionBitmapHasAll = FunctionBitmapCardinality<BitmapHasAllImpl, NameBitmapHasAll, UInt8>;
using FunctionBitmapHasAny = FunctionBitmapCardinality<BitmapHasAnyImpl, NameBitmapHasAny, UInt8>;
struct NameBitmapAnd
{

View File

@ -19,3 +19,29 @@
1
1
1
0
0
0
1
0
1
0
1
0
1
0
1
1
0
1
0
0
0
1
0
1
0
1
0
1
0

View File

@ -1,4 +1,4 @@
USE test;
-- USE test;
SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5]));
SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
@ -96,3 +96,68 @@ DROP TABLE IF EXISTS bitmap_test;
DROP TABLE IF EXISTS bitmap_state_test;
DROP TABLE IF EXISTS bitmap_column_expr_test;
-- bitmapHasAny:
---- Empty
SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]), bitmapBuild(emptyArrayUInt32()));
SELECT bitmapHasAny(bitmapBuild(emptyArrayUInt32()), bitmapBuild(emptyArrayUInt32()));
SELECT bitmapHasAny(bitmapBuild(emptyArrayUInt32()), bitmapBuild([1, 2, 3, 5]));
---- Small x Small
SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]),bitmapBuild([0, 3, 7]));
SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]),bitmapBuild([0, 4, 7]));
---- Small x Large
select bitmapHasAny(bitmapBuild([100,110,120]),bitmapBuild([ 99, 100, 101,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
select bitmapHasAny(bitmapBuild([100,200,500]),bitmapBuild([ 99, 101, 600,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
---- Large x Small
select bitmapHasAny(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([ 99, 100, 101]));
select bitmapHasAny(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([ 99, 101, 600]));
---- Large x Large
select bitmapHasAny(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
40,50,60]),bitmapBuild([ 41, 50, 61,
99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
select bitmapHasAny(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
40,50,60]),bitmapBuild([ 41, 49, 51, 61,
99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
-- bitmapHasAll:
---- Empty
SELECT bitmapHasAll(bitmapBuild([1, 2, 3, 5]), bitmapBuild(emptyArrayUInt32()));
SELECT bitmapHasAll(bitmapBuild(emptyArrayUInt32()), bitmapBuild(emptyArrayUInt32()));
SELECT bitmapHasAll(bitmapBuild(emptyArrayUInt32()), bitmapBuild([1, 2, 3, 5]));
---- Small x Small
select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapBuild([5,7]));
select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapBuild([5,7,2]));
---- Small x Large
select bitmapHasAll(bitmapBuild([100,110,120]),bitmapBuild([ 99, 100, 101,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
select bitmapHasAll(bitmapBuild([100,200,500]),bitmapBuild([ 99, 101, 600,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
---- Small x LargeSmall
select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapXor(bitmapBuild([1,5,7]), bitmapBuild([5,7,9])));
select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapXor(bitmapBuild([1,5,7]), bitmapBuild([2,5,7])));
---- Large x Small
select bitmapHasAll(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([100, 500]));
select bitmapHasAll(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([ 99, 100, 500]));
---- LargeSmall x Small
select bitmapHasAll(bitmapXor(bitmapBuild([1,7]), bitmapBuild([5,7,9])), bitmapBuild([1,5]));
select bitmapHasAll(bitmapXor(bitmapBuild([1,7]), bitmapBuild([5,7,9])), bitmapBuild([1,5,7]));
---- Large x Large
select bitmapHasAll(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([ 100, 200, 500,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
select bitmapHasAll(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),bitmapBuild([ 100, 200, 501,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));

View File

@ -1,277 +1,327 @@
# Bitmap functions
Bitmap functions work for two bitmaps Object value calculation, it is to return new bitmap or cardinality while using formula calculation, such as and, or, xor, and not, etc.
There are 2 kinds of construction methods for Bitmap Object. One is to be constructed by aggregation function groupBitmap with -State, the other is to be constructed by Array Object. It is also to convert Bitmap Object to Array Object.
RoaringBitmap is wrapped into a data structure while actual storage of Bitmap objects. When the cardinality is less than or equal to 32, it uses Set objet. When the cardinality is greater than 32, it uses RoaringBitmap object. That is why storage of low cardinality set is faster.
For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring).
## bitmapBuild
Build a bitmap from unsigned integer array.
```
bitmapBuild(array)
```
**Parameters**
- `array` unsigned integer array.
**Example**
``` sql
SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res
```
## bitmapToArray
Convert bitmap to integer array.
```
bitmapToArray(bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
```
```
┌─res─────────┐
│ [1,2,3,4,5] │
└─────────────┘
```
## bitmapAnd
Two bitmap and calculation, the result is a new bitmap.
```
bitmapAnd(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res─┐
│ [3] │
└─────┘
```
## bitmapOr
Two bitmap or calculation, the result is a new bitmap.
```
bitmapOr(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res─────────┐
│ [1,2,3,4,5] │
└─────────────┘
```
## bitmapXor
Two bitmap xor calculation, the result is a new bitmap.
```
bitmapXor(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res───────┐
│ [1,2,4,5] │
└───────────┘
```
## bitmapAndnot
Two bitmap andnot calculation, the result is a new bitmap.
```
bitmapAndnot(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res───┐
│ [1,2] │
└───────┘
```
## bitmapCardinality
Retrun bitmap cardinality of type UInt64.
```
bitmapCardinality(bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res
```
```
┌─res─┐
│ 5 │
└─────┘
```
## bitmapAndCardinality
Two bitmap and calculation, return cardinality of type UInt64.
```
bitmapAndCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 1 │
└─────┘
```
## bitmapOrCardinality
Two bitmap or calculation, return cardinality of type UInt64.
```
bitmapOrCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 5 │
└─────┘
```
## bitmapXorCardinality
Two bitmap xor calculation, return cardinality of type UInt64.
```
bitmapXorCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 4 │
└─────┘
```
## bitmapAndnotCardinality
Two bitmap andnot calculation, return cardinality of type UInt64.
```
bitmapAndnotCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapAndnotCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 2 │
└─────┘
```
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/bitmap_functions/) <!--hide-->
# Bitmap functions
Bitmap functions work for two bitmaps Object value calculation, it is to return new bitmap or cardinality while using formula calculation, such as and, or, xor, and not, etc.
There are 2 kinds of construction methods for Bitmap Object. One is to be constructed by aggregation function groupBitmap with -State, the other is to be constructed by Array Object. It is also to convert Bitmap Object to Array Object.
RoaringBitmap is wrapped into a data structure while actual storage of Bitmap objects. When the cardinality is less than or equal to 32, it uses Set objet. When the cardinality is greater than 32, it uses RoaringBitmap object. That is why storage of low cardinality set is faster.
For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring).
## bitmapBuild
Build a bitmap from unsigned integer array.
```
bitmapBuild(array)
```
**Parameters**
- `array` unsigned integer array.
**Example**
``` sql
SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res
```
## bitmapToArray
Convert bitmap to integer array.
```
bitmapToArray(bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
```
```
┌─res─────────┐
│ [1,2,3,4,5] │
└─────────────┘
```
## bitmapHasAny
Analogous to `hasAny(array, array)` returns 1 if bitmaps have any common elements, 0 otherwise.
For empty bitmaps returns 0.
```
bitmapHasAny(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapHasAny(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res
```
```
┌─res─┐
│ 1 │
└─────┘
```
## bitmapHasAll
Analogous to `hasAll(array, array)` returns 1 if the first bitmap contains all the elements of the second one, 0 otherwise.
If the second argument is an empty bitmap then returns 1.
```
bitmapHasAll(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res
```
```
┌─res─┐
│ 0 │
└─────┘
```
## bitmapAnd
Two bitmap and calculation, the result is a new bitmap.
```
bitmapAnd(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res─┐
│ [3] │
└─────┘
```
## bitmapOr
Two bitmap or calculation, the result is a new bitmap.
```
bitmapOr(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res─────────┐
│ [1,2,3,4,5] │
└─────────────┘
```
## bitmapXor
Two bitmap xor calculation, the result is a new bitmap.
```
bitmapXor(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res───────┐
│ [1,2,4,5] │
└───────────┘
```
## bitmapAndnot
Two bitmap andnot calculation, the result is a new bitmap.
```
bitmapAndnot(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
```
```
┌─res───┐
│ [1,2] │
└───────┘
```
## bitmapCardinality
Retrun bitmap cardinality of type UInt64.
```
bitmapCardinality(bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res
```
```
┌─res─┐
│ 5 │
└─────┘
```
## bitmapAndCardinality
Two bitmap and calculation, return cardinality of type UInt64.
```
bitmapAndCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 1 │
└─────┘
```
## bitmapOrCardinality
Two bitmap or calculation, return cardinality of type UInt64.
```
bitmapOrCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 5 │
└─────┘
```
## bitmapXorCardinality
Two bitmap xor calculation, return cardinality of type UInt64.
```
bitmapXorCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 4 │
└─────┘
```
## bitmapAndnotCardinality
Two bitmap andnot calculation, return cardinality of type UInt64.
```
bitmapAndnotCardinality(bitmap,bitmap)
```
**Parameters**
- `bitmap` bitmap object.
**Example**
``` sql
SELECT bitmapAndnotCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res;
```
```
┌─res─┐
│ 2 │
└─────┘
```
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/bitmap_functions/) <!--hide-->