added bitmapSubsetLimit

This commit is contained in:
Zhichang Yu 2019-09-17 14:34:08 +08:00
parent 1fe79ad43c
commit 0dc3866c36
7 changed files with 154 additions and 9 deletions

View File

@ -467,11 +467,10 @@ public:
return count; return count;
if (isSmall()) if (isSmall())
{ {
std::vector<T> ans;
for (const auto & x : small) for (const auto & x : small)
{ {
T val = x.getValue(); T val = x.getValue();
if ((UInt32)val >= range_start && (UInt32)val < range_end) if (UInt32(val) >= range_start && UInt32(val) < range_end)
{ {
r1.add(val); r1.add(val);
count++; count++;
@ -483,10 +482,47 @@ public:
roaring_uint32_iterator_t iterator; roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator); roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (iterator.has_value) while (iterator.has_value && UInt32(iterator.current_value) < range_end)
{
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
count++;
}
}
return count;
}
/**
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
*/
UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet& r1) const
{
UInt64 count = 0;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if (UInt32(val) >= range_start)
{
ans.push_back(val);
}
}
sort(ans.begin(), ans.end());
if (limit > ans.size())
limit = ans.size();
for (size_t i=0; i<limit; i++)
r1.add(ans[i]);
count = UInt64(limit);
}
else
{
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (UInt32(count) < limit && iterator.has_value)
{ {
if ((UInt32)iterator.current_value >= range_end)
break;
r1.add(iterator.current_value); r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator); roaring_advance_uint32_iterator(&iterator);
count++; count++;

View File

@ -10,6 +10,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
factory.registerFunction<FunctionBitmapBuild>(); factory.registerFunction<FunctionBitmapBuild>();
factory.registerFunction<FunctionBitmapToArray>(); factory.registerFunction<FunctionBitmapToArray>();
factory.registerFunction<FunctionBitmapSubsetInRange>(); factory.registerFunction<FunctionBitmapSubsetInRange>();
factory.registerFunction<FunctionBitmapSubsetLimit>();
factory.registerFunction<FunctionBitmapSelfCardinality>(); factory.registerFunction<FunctionBitmapSelfCardinality>();
factory.registerFunction<FunctionBitmapMin>(); factory.registerFunction<FunctionBitmapMin>();

View File

@ -34,6 +34,9 @@ namespace ErrorCodes
* Return subset in specified range (not include the range_end): * Return subset in specified range (not include the range_end):
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap * bitmapSubsetInRange: bitmap,integer,integer -> bitmap
* *
* Return subset of the smallest `limit` values in set which is no smaller than `range_start`.
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap
*
* Two bitmap and calculation: * Two bitmap and calculation:
* bitmapAnd: bitmap,bitmap -> bitmap * bitmapAnd: bitmap,bitmap -> bitmap
* *
@ -250,12 +253,13 @@ private:
} }
}; };
class FunctionBitmapSubsetInRange : public IFunction template <typename Impl>
class FunctionBitmapSubset : public IFunction
{ {
public: public:
static constexpr auto name = "bitmapSubsetInRange"; static constexpr auto name = Impl::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubsetInRange>(); } static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubset<Impl>>(); }
String getName() const override { return name; } String getName() const override { return name; }
@ -357,12 +361,37 @@ private:
col_to->insertDefault(); col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bd2 AggregateFunctionGroupBitmapData<T> & bd2
= *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]); = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
bd0.rbs.rb_range(range_start, range_end, bd2.rbs); Impl::apply(bd0, range_start, range_end, bd2);
} }
block.getByPosition(result).column = std::move(col_to); block.getByPosition(result).column = std::move(col_to);
} }
}; };
struct BitmapSubsetInRangeImpl
{
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
}
};
struct BitmapSubsetLimitImpl
{
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_limit(range_start, range_end, bd2.rbs);
}
};
using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;
template <typename Impl> template <typename Impl>
class FunctionBitmapSelfCardinalityImpl : public IFunction class FunctionBitmapSelfCardinalityImpl : public IFunction
{ {

View File

@ -67,6 +67,14 @@
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
[30,31,32,33,100] [30,31,32,33,100]
[100] [100]
[]
[]
[1,5,7,9]
[]
[5,7,9]
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]
[30,31,32,33,100,200,500]
[100,200,500]
4294967295 4294967295
4294967295 4294967295
4294967295 4294967295

View File

@ -212,6 +212,25 @@ select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200))); 100,200,500]), toUInt32(100), toUInt32(200)));
-- bitmapSubsetLimit:
---- Empty
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
---- Small
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(0), toUInt32(100)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(30), toUInt32(200)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));
-- bitmapMin: -- bitmapMin:
---- Empty ---- Empty
SELECT bitmapMin(bitmapBuild(emptyArrayUInt8())); SELECT bitmapMin(bitmapBuild(emptyArrayUInt8()));

View File

@ -82,6 +82,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘ └───────────────────┘
``` ```
## bitmapSubsetLimit {#bitmap_functions-bitmapsubsetlimit}
Return subset of the smallest `limit` values in set which is no less than `range_start`.
```
bitmapSubsetLimit(bitmap, range_start, limit)
```
**Parameters**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild).
- `range_start` range start point. Type: [UInt32](../../data_types/int_uint.md).
- `limit` subset cardinality upper limit. Type: [UInt32](../../data_types/int_uint.md).
**Example**
``` sql
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```
## bitmapContains {#bitmap_functions-bitmapcontains} ## bitmapContains {#bitmap_functions-bitmapcontains}
Checks whether the bitmap contains an element. Checks whether the bitmap contains an element.

View File

@ -77,6 +77,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘ └───────────────────┘
``` ```
## bitmapSubsetLimit
将位图指定范围(起始点和数目上限)转换为另一个位图。
```
bitmapSubsetLimit(bitmap, range_start, limit)
```
**参数**
- `bitmap` 位图对象.
- `range_start` 范围起始点(含).
- `limit` 子位图基数上限.
**示例**
``` sql
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```
## bitmapContains ## bitmapContains
检查位图是否包含指定元素。 检查位图是否包含指定元素。