Support bitmap64 feature

This commit is contained in:
Andy Yang 2020-12-07 17:35:15 +08:00 committed by Nikita Mikhailov
parent 608b9a28ba
commit ccb2fcc161
4 changed files with 592 additions and 96 deletions

View File

@ -23,3 +23,4 @@ add_library(roaring ${SRCS})
target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring) target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring)
target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)
target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/cpp)

View File

@ -12,6 +12,7 @@
// TODO: find out what it is. On github, they have proper interface headers like // TODO: find out what it is. On github, they have proper interface headers like
// this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h // this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h
#include <roaring/roaring.h> #include <roaring/roaring.h>
#include <roaring64map.hh>
namespace DB namespace DB
{ {
@ -26,25 +27,37 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
private: private:
using Small = SmallSet<T, small_set_size>; using Small = SmallSet<T, small_set_size>;
using ValueBuffer = std::vector<T>; using ValueBuffer = std::vector<T>;
bool bUInt64 = sizeof(T) >= 8;
Small small; Small small;
roaring_bitmap_t * rb = nullptr; roaring_bitmap_t * rb = nullptr;
std::shared_ptr<Roaring64Map> rb64 = nullptr;
void toLarge() void toLarge()
{ {
if ( isUInt64() )
rb64 = std::make_shared<Roaring64Map>();
else
rb = roaring_bitmap_create(); rb = roaring_bitmap_create();
for (const auto & x : small) for (const auto & x : small){
if ( isUInt64() )
rb64->add( static_cast<UInt64>(x.getValue()) );
else
roaring_bitmap_add(rb, x.getValue()); roaring_bitmap_add(rb, x.getValue());
} }
}
public: public:
bool isLarge() const { return rb != nullptr; } bool isLarge() const { return rb != nullptr || rb64 != nullptr; }
bool isSmall() const { return rb == nullptr; } bool isSmall() const { return rb == nullptr && rb64 == nullptr; }
bool isUInt64() const { return bUInt64; }
~RoaringBitmapWithSmallSet() ~RoaringBitmapWithSmallSet()
{ {
if (isLarge()) if (isLarge() && !isUInt64() )
roaring_bitmap_free(rb); roaring_bitmap_free(rb);
} }
@ -59,19 +72,35 @@ public:
else else
{ {
toLarge(); toLarge();
if ( isUInt64() )
rb64->add( static_cast<UInt64>(value) );
else
roaring_bitmap_add(rb, value); roaring_bitmap_add(rb, value);
} }
} }
} }
else
{
if ( isUInt64() )
rb64->add( static_cast<UInt64>(value) );
else else
roaring_bitmap_add(rb, value); roaring_bitmap_add(rb, value);
} }
}
UInt64 size() const UInt64 size() const
{ {
return isSmall() if (isSmall())
? small.size() {
: roaring_bitmap_get_cardinality(rb); return small.size();
}
else
{
if ( isUInt64() )
return rb64->cardinality();
else
return roaring_bitmap_get_cardinality(rb);
}
} }
void merge(const RoaringBitmapWithSmallSet & r1) void merge(const RoaringBitmapWithSmallSet & r1)
@ -81,6 +110,9 @@ public:
if (isSmall()) if (isSmall())
toLarge(); toLarge();
if ( isUInt64() )
*rb64 |= *r1.rb64;
else
roaring_bitmap_or_inplace(rb, r1.rb); roaring_bitmap_or_inplace(rb, r1.rb);
} }
else else
@ -92,39 +124,63 @@ public:
void read(DB::ReadBuffer & in) void read(DB::ReadBuffer & in)
{ {
bool is_large; // Container type: 0: SmallSet, 1: RoaringBitmap32, 2: RoaringBitmap64
readBinary(is_large, in); UInt8 containerType = 0;
readBinary(containerType, in);
if (is_large) if( 0 == containerType )
{ {
small.read(in);
} else if( 1 == containerType ) {
std::string s; std::string s;
readStringBinary(s,in); readStringBinary(s,in);
rb = roaring_bitmap_portable_deserialize(s.c_str()); rb = roaring_bitmap_portable_deserialize(s.c_str());
for (const auto & x : small) // merge from small // It has been persisted in the bitmap and does not need to merge from small
roaring_bitmap_add(rb, x.getValue()); // for (const auto & x : small) // merge from small
// roaring_bitmap_add(rb, x.getValue());
} else {
std::string s;
readStringBinary(s,in);
rb64 = std::make_shared<Roaring64Map>( Roaring64Map::read(s.c_str()) );
} }
else
small.read(in);
} }
void write(DB::WriteBuffer & out) const void write(DB::WriteBuffer & out) const
{ {
writeBinary(isLarge(), out); // Container type: 0: SmallSet, 1: RoaringBitmap32, 2: RoaringBitmap64
UInt8 containerType = 0;
if (isLarge())
{
if ( isUInt64() )
containerType = 2;
else
containerType = 1;
}
writeBinary(containerType, out);
if (isLarge()) if (isLarge())
{ {
if ( isUInt64() ){
uint32_t expectedsize = rb64->getSizeInBytes();
std::string s(expectedsize,0);
rb64->write(const_cast<char*>(s.data()));
writeStringBinary(s,out);
} else {
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb); uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb);
std::string s(expectedsize,0); std::string s(expectedsize,0);
roaring_bitmap_portable_serialize(rb, const_cast<char*>(s.data())); roaring_bitmap_portable_serialize(rb, const_cast<char*>(s.data()));
writeStringBinary(s,out); writeStringBinary(s,out);
} }
}
else else
small.write(out); small.write(out);
} }
roaring_bitmap_t * getRb() const { return rb; } roaring_bitmap_t * getRb() const { return rb; }
std::shared_ptr<Roaring64Map> getRb64() const { return rb64; }
Small & getSmall() const { return small; } Small & getSmall() const { return small; }
/** /**
@ -138,6 +194,17 @@ public:
return smallRb; return smallRb;
} }
/**
* Get a new Roaring64Map from elements of small
*/
std::shared_ptr<Roaring64Map> getNewRb64FromSmall() const
{
std::shared_ptr<Roaring64Map> smallRb64 = std::make_shared<Roaring64Map>();
for (const auto & x : small)
smallRb64->add( static_cast<UInt64>(x.getValue()) );
return smallRb64;
}
/** /**
* Computes the intersection between two bitmaps * Computes the intersection between two bitmaps
*/ */
@ -162,8 +229,18 @@ public:
else if (isSmall() && r1.isLarge()) else if (isSmall() && r1.isLarge())
{ {
for (const auto & x : small) for (const auto & x : small)
{
if ( isUInt64() )
{
if (rb64->contains(static_cast<UInt64>(x.getValue())) )
buffer.push_back(x.getValue());
}
else
{
if (roaring_bitmap_contains(r1.rb, x.getValue())) if (roaring_bitmap_contains(r1.rb, x.getValue()))
buffer.push_back(x.getValue()); buffer.push_back(x.getValue());
}
}
// Clear out the original values // Clear out the original values
small.clear(); small.clear();
@ -174,6 +251,13 @@ public:
buffer.clear(); buffer.clear();
} }
else else
{
if ( isUInt64() )
{
std::shared_ptr<Roaring64Map> newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64();
*rb64 &= *newRb64;
}
else
{ {
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
roaring_bitmap_and_inplace(rb, rb1); roaring_bitmap_and_inplace(rb, rb1);
@ -181,6 +265,7 @@ public:
roaring_bitmap_free(rb1); roaring_bitmap_free(rb1);
} }
} }
}
/** /**
* Computes the union between two bitmaps. * Computes the union between two bitmaps.
@ -194,11 +279,17 @@ public:
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
if ( isUInt64() ){
std::shared_ptr<Roaring64Map> newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64();
*rb64 ^= *newRb64;
} else {
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
roaring_bitmap_xor_inplace(rb, rb1); roaring_bitmap_xor_inplace(rb, rb1);
if (r1.isSmall()) if (r1.isSmall())
roaring_bitmap_free(rb1); roaring_bitmap_free(rb1);
} }
}
/** /**
* Computes the difference (andnot) between two bitmaps * Computes the difference (andnot) between two bitmaps
@ -224,8 +315,18 @@ public:
else if (isSmall() && r1.isLarge()) else if (isSmall() && r1.isLarge())
{ {
for (const auto & x : small) for (const auto & x : small)
{
if ( isUInt64() )
{
if ( !rb64->contains(static_cast<UInt64>(x.getValue())))
buffer.push_back(x.getValue());
}
else
{
if (!roaring_bitmap_contains(r1.rb, x.getValue())) if (!roaring_bitmap_contains(r1.rb, x.getValue()))
buffer.push_back(x.getValue()); buffer.push_back(x.getValue());
}
}
// Clear out the original values // Clear out the original values
small.clear(); small.clear();
@ -236,6 +337,13 @@ public:
buffer.clear(); buffer.clear();
} }
else else
{
if ( isUInt64() )
{
std::shared_ptr<Roaring64Map> newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64();
*rb64 -= *newRb64;
}
else
{ {
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
roaring_bitmap_andnot_inplace(rb, rb1); roaring_bitmap_andnot_inplace(rb, rb1);
@ -243,6 +351,7 @@ public:
roaring_bitmap_free(rb1); roaring_bitmap_free(rb1);
} }
} }
}
/** /**
* Computes the cardinality of the intersection between two bitmaps. * Computes the cardinality of the intersection between two bitmaps.
@ -259,9 +368,24 @@ public:
else if (isSmall() && r1.isLarge()) else if (isSmall() && r1.isLarge())
{ {
for (const auto & x : small) for (const auto & x : small)
{
if ( isUInt64() )
{
if ( rb64->contains(static_cast<UInt64>(x.getValue())))
++retSize;
} else {
if (roaring_bitmap_contains(r1.rb, x.getValue())) if (roaring_bitmap_contains(r1.rb, x.getValue()))
++retSize; ++retSize;
} }
}
}
else
{
if ( isUInt64() )
{
std::shared_ptr<Roaring64Map> newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64();
retSize = ( *rb64 & *newRb64 ).cardinality();
}
else else
{ {
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
@ -269,6 +393,7 @@ public:
if (r1.isSmall()) if (r1.isSmall())
roaring_bitmap_free(rb1); roaring_bitmap_free(rb1);
} }
}
return retSize; return retSize;
} }
@ -311,10 +436,21 @@ public:
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
UInt8 is_true = 0;
if ( isUInt64() )
{
std::shared_ptr<Roaring64Map> newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64();
is_true = *rb64 == *newRb64;
}
else
{
roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
UInt8 is_true = roaring_bitmap_equals(rb, rb1); is_true = roaring_bitmap_equals(rb, rb1);
if (r1.isSmall()) if (r1.isSmall())
roaring_bitmap_free(rb1); roaring_bitmap_free(rb1);
}
return is_true; return is_true;
} }
@ -335,18 +471,49 @@ public:
else else
{ {
for (const auto & x : small) for (const auto & x : small)
{
if ( isUInt64() )
{
if( r1.rb64->contains( static_cast<UInt64>(x.getValue()) ) )
return 1;
}
else
{
if (roaring_bitmap_contains(r1.rb, x.getValue())) if (roaring_bitmap_contains(r1.rb, x.getValue()))
return 1; return 1;
} }
} }
}
}
else if (r1.isSmall()) else if (r1.isSmall())
{ {
for (const auto & x : r1.small) for (const auto & x : r1.small)
{
if ( isUInt64() )
{
if( rb64->contains( static_cast<UInt64>(x.getValue()) ) )
return 1;
}
else
{
if (roaring_bitmap_contains(rb, x.getValue())) if (roaring_bitmap_contains(rb, x.getValue()))
return 1; return 1;
} }
else if (roaring_bitmap_intersect(rb, r1.rb)) }
}
else
{
if ( isUInt64() )
{
if( ( *rb64 & *r1.rb64 ).cardinality() > 0 )
return 1; return 1;
}
else
{
if (roaring_bitmap_intersect(rb, r1.rb))
return 1;
}
}
return 0; return 0;
} }
@ -379,28 +546,72 @@ public:
// r1_size + number of not found elements, if this sum becomes // r1_size + number of not found elements, if this sum becomes
// greater then r1 is not a subset. // greater then r1 is not a subset.
for (const auto & x : small) for (const auto & x : small)
{
if ( isUInt64() )
{
if (!r1.rb64->contains( static_cast<UInt64>(x.getValue()) ) && ++r1_size > small.size())
return 0;
}
else
{
if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size()) if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size())
return 0; return 0;
} }
} }
}
}
else if (r1.isSmall()) else if (r1.isSmall())
{ {
for (const auto & x : r1.small) for (const auto & x : r1.small)
{
if ( isUInt64() )
{
if ( !rb64->contains( static_cast<UInt64>(x.getValue())) )
return 0;
}
else
{
if (!roaring_bitmap_contains(rb, x.getValue())) if (!roaring_bitmap_contains(rb, x.getValue()))
return 0; return 0;
} }
else if (!roaring_bitmap_is_subset(r1.rb, rb)) }
}
else
{
if ( isUInt64() )
{
if (!r1.rb64->isSubset(*rb64))
return 0; return 0;
}
else
{
if (!roaring_bitmap_is_subset(r1.rb, rb))
return 0;
}
}
return 1; return 1;
} }
/** /**
* Check whether this bitmap contains the argument. * Check whether this bitmap contains the argument.
*/ */
UInt8 rb_contains(const UInt32 x) const UInt8 rb_contains(const UInt64 x) const
{ {
return isSmall() ? small.find(x) != small.end() : roaring_bitmap_contains(rb, x); if (isSmall())
{
return small.find(x) != small.end();
}
else
{
if ( isUInt64() )
{
return rb64->contains(x);
}
else
{
return roaring_bitmap_contains(rb, x);
}
}
} }
/** /**
@ -410,6 +621,10 @@ public:
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
if ( isUInt64() )
rb64->remove( offsetid );
else
roaring_bitmap_remove(rb, offsetid); roaring_bitmap_remove(rb, offsetid);
} }
@ -423,6 +638,9 @@ public:
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
if ( isUInt64() )
rb64->flip( offsetstart, offsetend );
else
roaring_bitmap_flip_inplace(rb, offsetstart, offsetend); roaring_bitmap_flip_inplace(rb, offsetstart, offsetend);
} }
@ -433,6 +651,10 @@ public:
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
if ( isUInt64() )
return rb64->rank( offsetid );
else
return roaring_bitmap_rank(rb, offsetid); return roaring_bitmap_rank(rb, offsetid);
} }
@ -452,6 +674,15 @@ public:
} }
} }
else else
{
if ( isUInt64() )
{
for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++) {
res_data.emplace_back( *iterator );
count++;
}
}
else
{ {
roaring_uint32_iterator_t iterator; roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator); roaring_init_iterator(rb, &iterator);
@ -462,13 +693,14 @@ public:
count++; count++;
} }
} }
}
return count; return count;
} }
/** /**
* Return new set with specified range (not include the range_end) * Return new set with specified range (not include the range_end)
*/ */
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet & r1) const UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const
{ {
UInt64 count = 0; UInt64 count = 0;
if (range_start >= range_end) if (range_start >= range_end)
@ -486,6 +718,24 @@ public:
} }
} }
else else
{
if ( isUInt64() )
{
for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++)
{
if( *iterator < range_start )
continue;
if( *iterator < range_end)
{
r1.add( *iterator );
++count;
}
else
break;
}
}
else
{ {
roaring_uint32_iterator_t iterator; roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator); roaring_init_iterator(rb, &iterator);
@ -497,13 +747,14 @@ public:
++count; ++count;
} }
} }
}
return count; return count;
} }
/** /**
* Return new set of the smallest `limit` values in set which is no less than `range_start`. * Return new set of the smallest `limit` values in set which is no less than `range_start`.
*/ */
UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet & r1) const UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
{ {
UInt64 count = 0; UInt64 count = 0;
if (isSmall()) if (isSmall())
@ -525,6 +776,24 @@ public:
count = UInt64(limit); count = UInt64(limit);
} }
else else
{
if ( isUInt64() )
{
for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++)
{
if( *iterator < range_start )
continue;
if( count < limit)
{
r1.add( *iterator );
++count;
}
else
break;
}
}
else
{ {
roaring_uint32_iterator_t iterator; roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator); roaring_init_iterator(rb, &iterator);
@ -536,6 +805,7 @@ public:
++count; ++count;
} }
} }
}
return count; return count;
} }
@ -555,6 +825,9 @@ public:
} }
else else
{ {
if ( isUInt64() )
min_val = rb64->minimum();
else
min_val = UInt64(roaring_bitmap_minimum(rb)); min_val = UInt64(roaring_bitmap_minimum(rb));
} }
return min_val; return min_val;
@ -576,6 +849,9 @@ public:
} }
else else
{ {
if ( isUInt64() )
max_val = rb64->maximum();
else
max_val = UInt64(roaring_bitmap_maximum(rb)); max_val = UInt64(roaring_bitmap_maximum(rb));
} }
return max_val; return max_val;
@ -592,12 +868,144 @@ public:
{ {
if (from_vals[i] == to_vals[i]) if (from_vals[i] == to_vals[i])
continue; continue;
if ( isUInt64() )
{
bool changed = rb64->removeChecked( from_vals[i] );
if (changed)
rb64->add(to_vals[i]);
}
else
{
bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]); bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]);
if (changed) if (changed)
roaring_bitmap_add(rb, to_vals[i]); roaring_bitmap_add(rb, to_vals[i]);
} }
} }
}
private:
/// To read and write the DB Buffer directly, migrate code from CRoaring
void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args)
{
void * container = nullptr; // hold value of last container touched
uint8_t typecode = 0; // typecode of last container touched
uint32_t prev = 0; // previous valued inserted
size_t i = 0; // index of value
int containerindex = 0;
if (n_args == 0)
return;
uint32_t val;
readBinary(val, db_buf);
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
prev = val;
++i;
for (; i < n_args; ++i)
{
readBinary(val, db_buf);
if (((prev ^ val) >> 16) == 0)
{ // no need to seek the container, it is at hand
// because we already have the container at hand, we can do the
// insertion
// automatically, bypassing the roaring_bitmap_add call
uint8_t newtypecode = typecode;
void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode);
// rare instance when we need to
if (container2 != container)
{
// change the container type
container_free(container, typecode);
ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode);
typecode = newtypecode;
container = container2;
}
}
else
{
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
}
prev = val;
}
}
void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const
{
size_t ctr = 0;
for (Int32 i = 0; i < ra->size; ++i)
{
Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast<UInt32>(ra->keys[i])) << 16);
ctr += num_added;
}
}
UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const
{
container = container_unwrap_shared(container, &typecode);
switch (typecode)
{
case BITSET_CONTAINER_TYPE_CODE:
return db_bitset_container_to_uint32_array(db_buf, static_cast<const bitset_container_t *>(container), base);
case ARRAY_CONTAINER_TYPE_CODE:
return db_array_container_to_uint32_array(db_buf, static_cast<const array_container_t *>(container), base);
case RUN_CONTAINER_TYPE_CODE:
return db_run_container_to_uint32_array(db_buf, static_cast<const run_container_t *>(container), base);
}
return 0;
}
UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const
{
return static_cast<UInt32>(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base));
}
size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const
{
UInt32 outpos = 0;
for (size_t i = 0; i < length; ++i)
{
UInt64 w = bitset[i];
while (w != 0)
{
UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT
UInt32 val = r + base;
writePODBinary(val, db_buf);
outpos++;
w ^= t;
}
base += 64;
}
return outpos;
}
int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const
{
UInt32 outpos = 0;
for (Int32 i = 0; i < cont->cardinality; ++i)
{
const UInt32 val = base + cont->array[i];
writePODBinary(val, db_buf);
outpos++;
}
return outpos;
}
int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const
{
UInt32 outpos = 0;
for (Int32 i = 0; i < cont->n_runs; ++i)
{
UInt32 run_start = base + cont->runs[i].value;
UInt16 le = cont->runs[i].length;
for (Int32 j = 0; j <= le; ++j)
{
UInt32 val = run_start + j;
writePODBinary(val, db_buf);
outpos++;
}
}
return outpos;
}
}; };
template <typename T> template <typename T>

View File

@ -0,0 +1,14 @@
2019-01-01 100
2019-01-02 110
2019-01-03 210
210
2019-01-01 100 [4294967296,4294967297,4294967298,4294967299,4294967300,4294967301,4294967302,4294967303,4294967304,4294967305,4294967306,4294967307,4294967308,4294967309,4294967310,4294967311,4294967312,4294967313,4294967314,4294967315,4294967316,4294967317,4294967318,4294967319,4294967320,4294967321,4294967322,4294967323,4294967324,4294967325,4294967326,4294967327,4294967328,4294967329,4294967330,4294967331,4294967332,4294967333,4294967334,4294967335,4294967336,4294967337,4294967338,4294967339,4294967340,4294967341,4294967342,4294967343,4294967344,4294967345,4294967346,4294967347,4294967348,4294967349,4294967350,4294967351,4294967352,4294967353,4294967354,4294967355,4294967356,4294967357,4294967358,4294967359,4294967360,4294967361,4294967362,4294967363,4294967364,4294967365,4294967366,4294967367,4294967368,4294967369,4294967370,4294967371,4294967372,4294967373,4294967374,4294967375,4294967376,4294967377,4294967378,4294967379,4294967380,4294967381,4294967382,4294967383,4294967384,4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395]
2019-01-02 110 [4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395,4294967396,4294967397,4294967398,4294967399,4294967400,4294967401,4294967402,4294967403,4294967404,4294967405,4294967406,4294967407,4294967408,4294967409,4294967410,4294967411,4294967412,4294967413,4294967414,4294967415,4294967416,4294967417,4294967418,4294967419,4294967420,4294967421,4294967422,4294967423,4294967424,4294967425,4294967426,4294967427,4294967428,4294967429,4294967430,4294967431,4294967432,4294967433,4294967434,4294967435,4294967436,4294967437,4294967438,4294967439,4294967440,4294967441,4294967442,4294967443,4294967444,4294967445,4294967446,4294967447,4294967448,4294967449,4294967450,4294967451,4294967452,4294967453,4294967454,4294967455,4294967456,4294967457,4294967458,4294967459,4294967460,4294967461,4294967462,4294967463,4294967464,4294967465,4294967466,4294967467,4294967468,4294967469,4294967470,4294967471,4294967472,4294967473,4294967474,4294967475,4294967476,4294967477,4294967478,4294967479,4294967480,4294967481,4294967482,4294967483,4294967484,4294967485,4294967486,4294967487,4294967488,4294967489,4294967490,4294967491,4294967492,4294967493,4294967494]
2019-01-03 210 [4294967296,4294967297,4294967298,4294967299,4294967300,4294967301,4294967302,4294967303,4294967304,4294967305,4294967306,4294967307,4294967308,4294967309,4294967310,4294967311,4294967312,4294967313,4294967314,4294967315,4294967316,4294967317,4294967318,4294967319,4294967320,4294967321,4294967322,4294967323,4294967324,4294967325,4294967326,4294967327,4294967328,4294967329,4294967330,4294967331,4294967332,4294967333,4294967334,4294967335,4294967336,4294967337,4294967338,4294967339,4294967340,4294967341,4294967342,4294967343,4294967344,4294967345,4294967346,4294967347,4294967348,4294967349,4294967350,4294967351,4294967352,4294967353,4294967354,4294967355,4294967356,4294967357,4294967358,4294967359,4294967360,4294967361,4294967362,4294967363,4294967364,4294967365,4294967366,4294967367,4294967368,4294967369,4294967370,4294967371,4294967372,4294967373,4294967374,4294967375,4294967376,4294967377,4294967378,4294967379,4294967380,4294967381,4294967382,4294967383,4294967384,4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395,4294967396,4294967397,4294967398,4294967399,4294967400,4294967401,4294967402,4294967403,4294967404,4294967405,4294967406,4294967407,4294967408,4294967409,4294967410,4294967411,4294967412,4294967413,4294967414,4294967415,4294967416,4294967417,4294967418,4294967419,4294967420,4294967421,4294967422,4294967423,4294967424,4294967425,4294967426,4294967427,4294967428,4294967429,4294967430,4294967431,4294967432,4294967433,4294967434,4294967435,4294967436,4294967437,4294967438,4294967439,4294967440,4294967441,4294967442,4294967443,4294967444,4294967445,4294967446,4294967447,4294967448,4294967449,4294967450,4294967451,4294967452,4294967453,4294967454,4294967455,4294967456,4294967457,4294967458,4294967459,4294967460,4294967461,4294967462,4294967463,4294967464,4294967465,4294967466,4294967467,4294967468,4294967469,4294967470,4294967471,4294967472,4294967473,4294967474,4294967475,4294967476,4294967477,4294967478,4294967479,4294967480,4294967481,4294967482,4294967483,4294967484,4294967485,4294967486,4294967487,4294967488,4294967489,4294967490,4294967491,4294967492,4294967493,4294967494,4294967495,4294967496,4294967497,4294967498,4294967499,4294967500,4294967501,4294967502,4294967503,4294967504,4294967505]
110 100 199 11 99 188
110 100 199 11 99 188
211
211
209
[4294967296,4294967297,4294967298]
[4294967296,4294967297,4294967298]

View File

@ -0,0 +1,73 @@
DROP TABLE IF EXISTS bitmap_test;
CREATE TABLE bitmap_test(pickup_date Date, city_id UInt32, uid UInt64)ENGINE = Memory;
INSERT INTO bitmap_test SELECT '2019-01-01', 1, 4294967295 + number FROM numbers(1,100);
INSERT INTO bitmap_test SELECT '2019-01-02', 1, 4294967295 + number FROM numbers(90,110);
INSERT INTO bitmap_test SELECT '2019-01-03', 2, 4294967295 + number FROM numbers(1,210);
DROP TABLE IF EXISTS bitmap_state_test;
CREATE TABLE bitmap_state_test
(
pickup_date Date,
city_id UInt32,
uv AggregateFunction( groupBitmap, UInt64 )
)
ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(pickup_date) ORDER BY (pickup_date, city_id);
INSERT INTO bitmap_state_test SELECT
pickup_date,
city_id,
groupBitmapState(uid) AS uv
FROM bitmap_test
GROUP BY pickup_date, city_id;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date;
SELECT
bitmapCardinality(day_today) AS today_users,
bitmapCardinality(day_before) AS before_users,
bitmapOrCardinality(day_today, day_before) AS all_users,
bitmapAndCardinality(day_today, day_before) AS old_users,
bitmapAndnotCardinality(day_today, day_before) AS new_users,
bitmapXorCardinality(day_today, day_before) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
) js2
USING city_id;
SELECT
bitmapCardinality(day_today) AS today_users,
bitmapCardinality(day_before) AS before_users,
bitmapCardinality(bitmapOr(day_today, day_before))ll_users,
bitmapCardinality(bitmapAnd(day_today, day_before)) AS old_users,
bitmapCardinality(bitmapAndnot(day_today, day_before)) AS new_users,
bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
) js2
USING city_id;
SELECT count(*) FROM bitmap_test WHERE bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
SELECT count(*) FROM bitmap_test WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'));
SELECT count(*) FROM bitmap_test WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id;