added bitmapSubsetLimit

This commit is contained in:
Zhichang Yu 2019-09-17 14:34:08 +08:00
parent 1fe79ad43c
commit 0dc3866c36
7 changed files with 154 additions and 9 deletions

View File

@ -467,11 +467,10 @@ public:
return count;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if ((UInt32)val >= range_start && (UInt32)val < range_end)
if (UInt32(val) >= range_start && UInt32(val) < range_end)
{
r1.add(val);
count++;
@ -483,10 +482,47 @@ public:
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (iterator.has_value)
while (iterator.has_value && UInt32(iterator.current_value) < range_end)
{
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
count++;
}
}
return count;
}
/**
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
*/
UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet& r1) const
{
UInt64 count = 0;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if (UInt32(val) >= range_start)
{
ans.push_back(val);
}
}
sort(ans.begin(), ans.end());
if (limit > ans.size())
limit = ans.size();
for (size_t i=0; i<limit; i++)
r1.add(ans[i]);
count = UInt64(limit);
}
else
{
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (UInt32(count) < limit && iterator.has_value)
{
if ((UInt32)iterator.current_value >= range_end)
break;
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
count++;

View File

@ -10,6 +10,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
factory.registerFunction<FunctionBitmapBuild>();
factory.registerFunction<FunctionBitmapToArray>();
factory.registerFunction<FunctionBitmapSubsetInRange>();
factory.registerFunction<FunctionBitmapSubsetLimit>();
factory.registerFunction<FunctionBitmapSelfCardinality>();
factory.registerFunction<FunctionBitmapMin>();

View File

@ -34,6 +34,9 @@ namespace ErrorCodes
* Return subset in specified range (not include the range_end):
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap
*
* Return subset of the smallest `limit` values in set which is no smaller than `range_start`.
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap
*
* Two bitmap and calculation:
* bitmapAnd: bitmap,bitmap -> bitmap
*
@ -250,12 +253,13 @@ private:
}
};
class FunctionBitmapSubsetInRange : public IFunction
template <typename Impl>
class FunctionBitmapSubset : public IFunction
{
public:
static constexpr auto name = "bitmapSubsetInRange";
static constexpr auto name = Impl::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubsetInRange>(); }
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubset<Impl>>(); }
String getName() const override { return name; }
@ -357,12 +361,37 @@ private:
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bd2
= *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
Impl::apply(bd0, range_start, range_end, bd2);
}
block.getByPosition(result).column = std::move(col_to);
}
};
struct BitmapSubsetInRangeImpl
{
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
}
};
struct BitmapSubsetLimitImpl
{
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_limit(range_start, range_end, bd2.rbs);
}
};
using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;
template <typename Impl>
class FunctionBitmapSelfCardinalityImpl : public IFunction
{

View File

@ -67,6 +67,14 @@
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
[30,31,32,33,100]
[100]
[]
[]
[1,5,7,9]
[]
[5,7,9]
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]
[30,31,32,33,100,200,500]
[100,200,500]
4294967295
4294967295
4294967295

View File

@ -212,6 +212,25 @@ select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));
-- bitmapSubsetLimit:
---- Empty
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
---- Small
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(0), toUInt32(100)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(30), toUInt32(200)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));
-- bitmapMin:
---- Empty
SELECT bitmapMin(bitmapBuild(emptyArrayUInt8()));

View File

@ -82,6 +82,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘
```
## bitmapSubsetLimit {#bitmap_functions-bitmapsubsetlimit}
Return subset of the smallest `limit` values in set which is no less than `range_start`.
```
bitmapSubsetLimit(bitmap, range_start, limit)
```
**Parameters**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild).
- `range_start` range start point. Type: [UInt32](../../data_types/int_uint.md).
- `limit` subset cardinality upper limit. Type: [UInt32](../../data_types/int_uint.md).
**Example**
``` sql
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```
## bitmapContains {#bitmap_functions-bitmapcontains}
Checks whether the bitmap contains an element.

View File

@ -77,6 +77,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘
```
## bitmapSubsetLimit
将位图指定范围(起始点和数目上限)转换为另一个位图。
```
bitmapSubsetLimit(bitmap, range_start, limit)
```
**参数**
- `bitmap` 位图对象.
- `range_start` 范围起始点(含).
- `limit` 子位图基数上限.
**示例**
``` sql
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```
## bitmapContains
检查位图是否包含指定元素。