mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
added bitmapRange function
This commit is contained in:
parent
da22e5f9a6
commit
4f146eaa7e
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <roaring/roaring.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -454,6 +455,43 @@ public:
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return new set with specified range (not include the range_end)
|
||||
*/
|
||||
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const
|
||||
{
|
||||
UInt64 count = 0;
|
||||
if(range_start >= range_end)
|
||||
return count;
|
||||
if (isSmall())
|
||||
{
|
||||
std::vector<T> ans;
|
||||
for (const auto & x : small)
|
||||
{
|
||||
T val = x.getValue();
|
||||
if((UInt32)val >= range_start && (UInt32)val < range_end) {
|
||||
r1.add(val);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
roaring_uint32_iterator_t iterator;
|
||||
roaring_init_iterator(rb, &iterator);
|
||||
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
|
||||
while (iterator.has_value)
|
||||
{
|
||||
if((UInt32)iterator.current_value >= range_end)
|
||||
break;
|
||||
r1.add(iterator.current_value);
|
||||
roaring_advance_uint32_iterator(&iterator);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private:
|
||||
/// To read and write the DB Buffer directly, migrate code from CRoaring
|
||||
void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)
|
||||
|
@ -9,6 +9,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBitmapBuild>();
|
||||
factory.registerFunction<FunctionBitmapToArray>();
|
||||
factory.registerFunction<FunctionBitmapRange>();
|
||||
|
||||
factory.registerFunction<FunctionBitmapSelfCardinality>();
|
||||
factory.registerFunction<FunctionBitmapAndCardinality>();
|
||||
|
@ -30,6 +30,9 @@ namespace ErrorCodes
|
||||
* Convert bitmap to integer array:
|
||||
* bitmapToArray: bitmap -> integer[]
|
||||
*
|
||||
* Return new set with specified range (not include the range_end):
|
||||
* bitmapRange: bitmap,integer,integer -> bitmap
|
||||
*
|
||||
* Two bitmap and calculation:
|
||||
* bitmapAnd: bitmap,bitmap -> bitmap
|
||||
*
|
||||
@ -240,6 +243,116 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionBitmapRange : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "bitmapRange";
|
||||
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapRange>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
|
||||
if (!(arg_type1))
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto arg_type2 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
|
||||
if (!(arg_type2))
|
||||
throw Exception(
|
||||
"Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||
{
|
||||
const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
|
||||
const DataTypeAggregateFunction * aggr_type = typeid_cast<const DataTypeAggregateFunction *>(from_type);
|
||||
WhichDataType which(aggr_type->getArgumentsDataTypes()[0]);
|
||||
if (which.isUInt8())
|
||||
executeIntType<UInt8>(block, arguments, result, input_rows_count);
|
||||
else if (which.isUInt16())
|
||||
executeIntType<UInt16>(block, arguments, result, input_rows_count);
|
||||
else if (which.isUInt32())
|
||||
executeIntType<UInt32>(block, arguments, result, input_rows_count);
|
||||
else if (which.isUInt64())
|
||||
executeIntType<UInt64>(block, arguments, result, input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
using ToType = UInt64;
|
||||
|
||||
template <typename T>
|
||||
void executeIntType(
|
||||
Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
|
||||
const
|
||||
{
|
||||
const IColumn * columns[3];
|
||||
bool is_column_const[3];
|
||||
const ColumnAggregateFunction * colAggFunc;
|
||||
const PaddedPODArray<AggregateDataPtr> * container0;
|
||||
const PaddedPODArray<UInt32> * container1, * container2;
|
||||
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
{
|
||||
columns[i] = block.getByPosition(arguments[i]).column.get();
|
||||
is_column_const[i] = isColumnConst(*columns[i]);
|
||||
}
|
||||
if (is_column_const[0]) {
|
||||
colAggFunc = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
|
||||
} else{
|
||||
colAggFunc = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
|
||||
}
|
||||
container0 = &colAggFunc->getData();
|
||||
if (is_column_const[1])
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(columns[1])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(columns[1])->getData();
|
||||
if (is_column_const[2])
|
||||
container2 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(columns[2])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container2 = &typeid_cast<const ColumnUInt32*>(columns[2])->getData();
|
||||
|
||||
auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction());
|
||||
col_to->reserve(input_rows_count);
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
|
||||
const AggregateFunctionGroupBitmapData<T>& bd0
|
||||
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
|
||||
const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
|
||||
const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
|
||||
|
||||
auto bd2 = new AggregateFunctionGroupBitmapData<T>();
|
||||
bd0.rbs.rb_range(range_start, range_end, bd2->rbs);
|
||||
|
||||
col_to->insertFrom(reinterpret_cast<ConstAggregateDataPtr>(bd2));
|
||||
}
|
||||
block.getByPosition(result).column = std::move(col_to);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Name>
|
||||
class FunctionBitmapSelfCardinalityImpl : public IFunction
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <IO/WriteBufferAIO.h>
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <functional>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
@ -59,3 +59,11 @@
|
||||
1
|
||||
0
|
||||
1
|
||||
[]
|
||||
[]
|
||||
[1]
|
||||
[]
|
||||
[5]
|
||||
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
|
||||
[30,31,32,33,100]
|
||||
[100]
|
||||
|
@ -177,18 +177,37 @@ select bitmapHasAll(bitmapBuild([
|
||||
|
||||
-- bitmapContains:
|
||||
---- Empty
|
||||
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), CAST(0, 'UInt32'));
|
||||
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), CAST(5, 'UInt32'));
|
||||
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt32(0));
|
||||
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt32(5));
|
||||
---- Small
|
||||
select bitmapContains(bitmapBuild([1,5,7,9]),CAST(0, 'UInt32'));
|
||||
select bitmapContains(bitmapBuild([1,5,7,9]),CAST(9, 'UInt32'));
|
||||
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0));
|
||||
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(9));
|
||||
---- Large
|
||||
select bitmapContains(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]),CAST(100, 'UInt32'));
|
||||
100,200,500]),toUInt32(100));
|
||||
select bitmapContains(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]),CAST(101, 'UInt32'));
|
||||
100,200,500]),toUInt32(101));
|
||||
select bitmapContains(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]),CAST(500, 'UInt32'));
|
||||
100,200,500]),toUInt32(500));
|
||||
|
||||
-- bitmapRange:
|
||||
---- Empty
|
||||
SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
|
||||
SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
|
||||
---- Small
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
|
||||
---- Large
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]), toUInt32(0), toUInt32(100)));
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]), toUInt32(30), toUInt32(200)));
|
||||
select bitmapToArray(bitmapRange(bitmapBuild([
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
|
||||
100,200,500]), toUInt32(100), toUInt32(200)));
|
||||
|
@ -56,6 +56,32 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## bitmapRange {#bitmap_functions-bitmaprange}
|
||||
|
||||
Return new set with specified range (not include the range_end).
|
||||
|
||||
```
|
||||
bitmapRange(bitmap, range_start, range_end)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `range_start` – range start point. Type: [UInt32](../../data_types/int_uint.md).
|
||||
- `range_end` – range end point(excluded). Type: [UInt32](../../data_types/int_uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
|
||||
```
|
||||
|
||||
```
|
||||
┌─res───────────────┐
|
||||
│ [30,31,32,33,100] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## bitmapContains {#bitmap_functions-bitmapcontains}
|
||||
|
||||
Checks whether the bitmap contains an element.
|
||||
|
@ -51,6 +51,56 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## bitmapRange
|
||||
|
||||
将位图指定范围(不包含range_end)转换为另一个位图。
|
||||
|
||||
```
|
||||
bitmapRange(bitmap, range_start, range_end)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `bitmap` – 位图对象.
|
||||
- `range_start` – 范围起始点(含).
|
||||
- `range_end` – 范围结束点(不含).
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
|
||||
```
|
||||
|
||||
```
|
||||
┌─res───────────────┐
|
||||
│ [30,31,32,33,100] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## bitmapContains
|
||||
|
||||
检查位图是否包含指定元素。
|
||||
|
||||
```
|
||||
bitmapContains(haystack, needle)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` – 位图对象.
|
||||
- `needle` – 元素,类型UInt32.
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res
|
||||
```
|
||||
```text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## bitmapHasAny
|
||||
|
||||
与`hasAny(array,array)`类似,如果位图有任何公共元素则返回1,否则返回0。
|
||||
|
Loading…
Reference in New Issue
Block a user