added bitmapRange function

This commit is contained in:
Zhichang Yu 2019-07-30 18:54:50 +08:00
parent da22e5f9a6
commit 4f146eaa7e
8 changed files with 263 additions and 7 deletions

View File

@ -1,5 +1,6 @@
#pragma once
#include <algorithm>
#include <roaring/roaring.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -454,6 +455,43 @@ public:
return count;
}
/**
* Return new set with specified range (not include the range_end)
*/
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const
{
UInt64 count = 0;
if(range_start >= range_end)
return count;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if((UInt32)val >= range_start && (UInt32)val < range_end) {
r1.add(val);
count++;
}
}
}
else
{
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (iterator.has_value)
{
if((UInt32)iterator.current_value >= range_end)
break;
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
count++;
}
}
return count;
}
private:
/// To read and write the DB Buffer directly, migrate code from CRoaring
void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)

View File

@ -9,6 +9,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
{
factory.registerFunction<FunctionBitmapBuild>();
factory.registerFunction<FunctionBitmapToArray>();
factory.registerFunction<FunctionBitmapRange>();
factory.registerFunction<FunctionBitmapSelfCardinality>();
factory.registerFunction<FunctionBitmapAndCardinality>();

View File

@ -30,6 +30,9 @@ namespace ErrorCodes
* Convert bitmap to integer array:
* bitmapToArray: bitmap -> integer[]
*
* Return new set with specified range (not include the range_end):
* bitmapRange: bitmap,integer,integer -> bitmap
*
* Two bitmap and calculation:
* bitmapAnd: bitmap,bitmap -> bitmap
*
@ -240,6 +243,116 @@ private:
}
};
class FunctionBitmapRange : public IFunction
{
public:
static constexpr auto name = "bitmapRange";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapRange>(); }
String getName() const override { return name; }
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return 3; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
throw Exception(
"First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type1))
throw Exception(
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto arg_type2 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type2))
throw Exception(
"Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0];
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
const DataTypeAggregateFunction * aggr_type = typeid_cast<const DataTypeAggregateFunction *>(from_type);
WhichDataType which(aggr_type->getArgumentsDataTypes()[0]);
if (which.isUInt8())
executeIntType<UInt8>(block, arguments, result, input_rows_count);
else if (which.isUInt16())
executeIntType<UInt16>(block, arguments, result, input_rows_count);
else if (which.isUInt32())
executeIntType<UInt32>(block, arguments, result, input_rows_count);
else if (which.isUInt64())
executeIntType<UInt64>(block, arguments, result, input_rows_count);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
using ToType = UInt64;
template <typename T>
void executeIntType(
Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
const
{
const IColumn * columns[3];
bool is_column_const[3];
const ColumnAggregateFunction * colAggFunc;
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt32> * container1, * container2;
for (size_t i = 0; i < 3; ++i)
{
columns[i] = block.getByPosition(arguments[i]).column.get();
is_column_const[i] = isColumnConst(*columns[i]);
}
if (is_column_const[0]) {
colAggFunc = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
} else{
colAggFunc = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
}
container0 = &colAggFunc->getData();
if (is_column_const[1])
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(columns[1])->getDataColumnPtr().get())->getData();
else
container1 = &typeid_cast<const ColumnUInt32*>(columns[1])->getData();
if (is_column_const[2])
container2 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(columns[2])->getDataColumnPtr().get())->getData();
else
container2 = &typeid_cast<const ColumnUInt32*>(columns[2])->getData();
auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction());
col_to->reserve(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
const AggregateFunctionGroupBitmapData<T>& bd0
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
auto bd2 = new AggregateFunctionGroupBitmapData<T>();
bd0.rbs.rb_range(range_start, range_end, bd2->rbs);
col_to->insertFrom(reinterpret_cast<ConstAggregateDataPtr>(bd2));
}
block.getByPosition(result).column = std::move(col_to);
}
};
template <typename Name>
class FunctionBitmapSelfCardinalityImpl : public IFunction
{

View File

@ -1,6 +1,7 @@
#include <IO/WriteBufferAIO.h>
#include <Core/Defines.h>
#include <functional>
#include <filesystem>
#include <iostream>
#include <fstream>

View File

@ -59,3 +59,11 @@
1
0
1
[]
[]
[1]
[]
[5]
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
[30,31,32,33,100]
[100]

View File

@ -177,18 +177,37 @@ select bitmapHasAll(bitmapBuild([
-- bitmapContains:
---- Empty
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), CAST(0, 'UInt32'));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), CAST(5, 'UInt32'));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt32(0));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt32(5));
---- Small
select bitmapContains(bitmapBuild([1,5,7,9]),CAST(0, 'UInt32'));
select bitmapContains(bitmapBuild([1,5,7,9]),CAST(9, 'UInt32'));
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0));
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(9));
---- Large
select bitmapContains(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),CAST(100, 'UInt32'));
100,200,500]),toUInt32(100));
select bitmapContains(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),CAST(101, 'UInt32'));
100,200,500]),toUInt32(101));
select bitmapContains(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]),CAST(500, 'UInt32'));
100,200,500]),toUInt32(500));
-- bitmapRange:
---- Empty
SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
---- Small
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(0), toUInt32(100)));
select bitmapToArray(bitmapRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(30), toUInt32(200)));
select bitmapToArray(bitmapRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));

View File

@ -56,6 +56,32 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
└─────────────┘
```
## bitmapRange {#bitmap_functions-bitmaprange}
Return new set with specified range (not include the range_end).
```
bitmapRange(bitmap, range_start, range_end)
```
**Parameters**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild).
- `range_start` range start point. Type: [UInt32](../../data_types/int_uint.md).
- `range_end` range end point(excluded). Type: [UInt32](../../data_types/int_uint.md).
**Example**
``` sql
SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────┐
│ [30,31,32,33,100] │
└───────────────────┘
```
## bitmapContains {#bitmap_functions-bitmapcontains}
Checks whether the bitmap contains an element.

View File

@ -51,6 +51,56 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
└─────────────┘
```
## bitmapRange
将位图指定范围不包含range_end转换为另一个位图。
```
bitmapRange(bitmap, range_start, range_end)
```
**参数**
- `bitmap` 位图对象.
- `range_start` 范围起始点(含).
- `range_end` 范围结束点(不含).
**示例**
``` sql
SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────┐
│ [30,31,32,33,100] │
└───────────────────┘
```
## bitmapContains
检查位图是否包含指定元素。
```
bitmapContains(haystack, needle)
```
**参数**
- `haystack` 位图对象.
- `needle` 元素类型UInt32.
**示例**
``` sql
SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res
```
```text
┌─res─┐
│ 1 │
└─────┘
```
## bitmapHasAny
与`hasAny(arrayarray)`类似如果位图有任何公共元素则返回1否则返回0。