From ccb2fcc16124654350b2f8e765d9a263723b3152 Mon Sep 17 00:00:00 2001 From: Andy Yang Date: Mon, 7 Dec 2020 17:35:15 +0800 Subject: [PATCH] Support bitmap64 feature --- contrib/croaring-cmake/CMakeLists.txt | 1 + .../AggregateFunctionGroupBitmapData.h | 600 +++++++++++++++--- .../00829_bitmap64_function.reference | 14 + .../0_stateless/00829_bitmap64_function.sql | 73 +++ 4 files changed, 592 insertions(+), 96 deletions(-) create mode 100644 tests/queries/0_stateless/00829_bitmap64_function.reference create mode 100644 tests/queries/0_stateless/00829_bitmap64_function.sql diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt index 3189795347b..8a8ca62e051 100644 --- a/contrib/croaring-cmake/CMakeLists.txt +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -23,3 +23,4 @@ add_library(roaring ${SRCS}) target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring) target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) +target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/cpp) diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index d80e5e81f19..5e93fbb60d8 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -12,6 +12,7 @@ // TODO: find out what it is. On github, they have proper interface headers like // this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h #include +#include namespace DB { @@ -26,25 +27,37 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable private: using Small = SmallSet; using ValueBuffer = std::vector; + bool bUInt64 = sizeof(T) >= 8; + Small small; roaring_bitmap_t * rb = nullptr; + std::shared_ptr rb64 = nullptr; void toLarge() { - rb = roaring_bitmap_create(); + if ( isUInt64() ) + rb64 = std::make_shared(); + else + rb = roaring_bitmap_create(); - for (const auto & x : small) - roaring_bitmap_add(rb, x.getValue()); + for (const auto & x : small){ + if ( isUInt64() ) + rb64->add( static_cast(x.getValue()) ); + else + roaring_bitmap_add(rb, x.getValue()); + } } public: - bool isLarge() const { return rb != nullptr; } + bool isLarge() const { return rb != nullptr || rb64 != nullptr; } - bool isSmall() const { return rb == nullptr; } + bool isSmall() const { return rb == nullptr && rb64 == nullptr; } + + bool isUInt64() const { return bUInt64; } ~RoaringBitmapWithSmallSet() { - if (isLarge()) + if (isLarge() && !isUInt64() ) roaring_bitmap_free(rb); } @@ -59,19 +72,35 @@ public: else { toLarge(); - roaring_bitmap_add(rb, value); + if ( isUInt64() ) + rb64->add( static_cast(value) ); + else + roaring_bitmap_add(rb, value); } } } else - roaring_bitmap_add(rb, value); + { + if ( isUInt64() ) + rb64->add( static_cast(value) ); + else + roaring_bitmap_add(rb, value); + } } UInt64 size() const { - return isSmall() - ? small.size() - : roaring_bitmap_get_cardinality(rb); + if (isSmall()) + { + return small.size(); + } + else + { + if ( isUInt64() ) + return rb64->cardinality(); + else + return roaring_bitmap_get_cardinality(rb); + } } void merge(const RoaringBitmapWithSmallSet & r1) @@ -81,7 +110,10 @@ public: if (isSmall()) toLarge(); - roaring_bitmap_or_inplace(rb, r1.rb); + if ( isUInt64() ) + *rb64 |= *r1.rb64; + else + roaring_bitmap_or_inplace(rb, r1.rb); } else { @@ -92,39 +124,63 @@ public: void read(DB::ReadBuffer & in) { - bool is_large; - readBinary(is_large, in); + // Container type: 0: SmallSet, 1: RoaringBitmap32, 2: RoaringBitmap64 + UInt8 containerType = 0; + readBinary(containerType, in); - if (is_large) + if( 0 == containerType ) { + small.read(in); + } else if( 1 == containerType ) { std::string s; readStringBinary(s,in); rb = roaring_bitmap_portable_deserialize(s.c_str()); - for (const auto & x : small) // merge from small - roaring_bitmap_add(rb, x.getValue()); + // It has been persisted in the bitmap and does not need to merge from small +// for (const auto & x : small) // merge from small +// roaring_bitmap_add(rb, x.getValue()); + } else { + std::string s; + readStringBinary(s,in); + rb64 = std::make_shared( Roaring64Map::read(s.c_str()) ); } - else - small.read(in); } void write(DB::WriteBuffer & out) const { - writeBinary(isLarge(), out); + // Container type: 0: SmallSet, 1: RoaringBitmap32, 2: RoaringBitmap64 + UInt8 containerType = 0; + if (isLarge()) + { + if ( isUInt64() ) + containerType = 2; + else + containerType = 1; + } + + writeBinary(containerType, out); if (isLarge()) { - uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb); - std::string s(expectedsize,0); - roaring_bitmap_portable_serialize(rb, const_cast(s.data())); - writeStringBinary(s,out); + if ( isUInt64() ){ + uint32_t expectedsize = rb64->getSizeInBytes(); + std::string s(expectedsize,0); + rb64->write(const_cast(s.data())); + writeStringBinary(s,out); + } else { + uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb); + std::string s(expectedsize,0); + roaring_bitmap_portable_serialize(rb, const_cast(s.data())); + writeStringBinary(s,out); + } } else small.write(out); } - roaring_bitmap_t * getRb() const { return rb; } + std::shared_ptr getRb64() const { return rb64; } + Small & getSmall() const { return small; } /** @@ -138,6 +194,17 @@ public: return smallRb; } + /** + * Get a new Roaring64Map from elements of small + */ + std::shared_ptr getNewRb64FromSmall() const + { + std::shared_ptr smallRb64 = std::make_shared(); + for (const auto & x : small) + smallRb64->add( static_cast(x.getValue()) ); + return smallRb64; + } + /** * Computes the intersection between two bitmaps */ @@ -162,8 +229,18 @@ public: else if (isSmall() && r1.isLarge()) { for (const auto & x : small) - if (roaring_bitmap_contains(r1.rb, x.getValue())) - buffer.push_back(x.getValue()); + { + if ( isUInt64() ) + { + if (rb64->contains(static_cast(x.getValue())) ) + buffer.push_back(x.getValue()); + } + else + { + if (roaring_bitmap_contains(r1.rb, x.getValue())) + buffer.push_back(x.getValue()); + } + } // Clear out the original values small.clear(); @@ -175,10 +252,18 @@ public: } else { - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - roaring_bitmap_and_inplace(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); + if ( isUInt64() ) + { + std::shared_ptr newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64(); + *rb64 &= *newRb64; + } + else + { + roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); + roaring_bitmap_and_inplace(rb, rb1); + if (r1.isSmall()) + roaring_bitmap_free(rb1); + } } } @@ -194,10 +279,16 @@ public: { if (isSmall()) toLarge(); - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - roaring_bitmap_xor_inplace(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); + + if ( isUInt64() ){ + std::shared_ptr newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64(); + *rb64 ^= *newRb64; + } else { + roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); + roaring_bitmap_xor_inplace(rb, rb1); + if (r1.isSmall()) + roaring_bitmap_free(rb1); + } } /** @@ -224,8 +315,18 @@ public: else if (isSmall() && r1.isLarge()) { for (const auto & x : small) - if (!roaring_bitmap_contains(r1.rb, x.getValue())) - buffer.push_back(x.getValue()); + { + if ( isUInt64() ) + { + if ( !rb64->contains(static_cast(x.getValue()))) + buffer.push_back(x.getValue()); + } + else + { + if (!roaring_bitmap_contains(r1.rb, x.getValue())) + buffer.push_back(x.getValue()); + } + } // Clear out the original values small.clear(); @@ -237,10 +338,18 @@ public: } else { - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - roaring_bitmap_andnot_inplace(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); + if ( isUInt64() ) + { + std::shared_ptr newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64(); + *rb64 -= *newRb64; + } + else + { + roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); + roaring_bitmap_andnot_inplace(rb, rb1); + if (r1.isSmall()) + roaring_bitmap_free(rb1); + } } } @@ -259,15 +368,31 @@ public: else if (isSmall() && r1.isLarge()) { for (const auto & x : small) - if (roaring_bitmap_contains(r1.rb, x.getValue())) - ++retSize; + { + if ( isUInt64() ) + { + if ( rb64->contains(static_cast(x.getValue()))) + ++retSize; + } else { + if (roaring_bitmap_contains(r1.rb, x.getValue())) + ++retSize; + } + } } else { - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - retSize = roaring_bitmap_and_cardinality(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); + if ( isUInt64() ) + { + std::shared_ptr newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64(); + retSize = ( *rb64 & *newRb64 ).cardinality(); + } + else + { + roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); + retSize = roaring_bitmap_and_cardinality(rb, rb1); + if (r1.isSmall()) + roaring_bitmap_free(rb1); + } } return retSize; } @@ -311,10 +436,21 @@ public: { if (isSmall()) toLarge(); - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - UInt8 is_true = roaring_bitmap_equals(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); + + UInt8 is_true = 0; + if ( isUInt64() ) + { + std::shared_ptr newRb64 = r1.isSmall() ? r1.getNewRb64FromSmall() : r1.getRb64(); + is_true = *rb64 == *newRb64; + } + else + { + roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); + is_true = roaring_bitmap_equals(rb, rb1); + if (r1.isSmall()) + roaring_bitmap_free(rb1); + } + return is_true; } @@ -335,18 +471,49 @@ public: else { for (const auto & x : small) - if (roaring_bitmap_contains(r1.rb, x.getValue())) - return 1; + { + if ( isUInt64() ) + { + if( r1.rb64->contains( static_cast(x.getValue()) ) ) + return 1; + } + else + { + if (roaring_bitmap_contains(r1.rb, x.getValue())) + return 1; + } + } } } else if (r1.isSmall()) { for (const auto & x : r1.small) - if (roaring_bitmap_contains(rb, x.getValue())) - return 1; + { + if ( isUInt64() ) + { + if( rb64->contains( static_cast(x.getValue()) ) ) + return 1; + } + else + { + if (roaring_bitmap_contains(rb, x.getValue())) + return 1; + } + } + } + else + { + if ( isUInt64() ) + { + if( ( *rb64 & *r1.rb64 ).cardinality() > 0 ) + return 1; + } + else + { + if (roaring_bitmap_intersect(rb, r1.rb)) + return 1; + } } - else if (roaring_bitmap_intersect(rb, r1.rb)) - return 1; return 0; } @@ -379,28 +546,72 @@ public: // r1_size + number of not found elements, if this sum becomes // greater then r1 is not a subset. for (const auto & x : small) - if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size()) - return 0; + { + if ( isUInt64() ) + { + if (!r1.rb64->contains( static_cast(x.getValue()) ) && ++r1_size > small.size()) + return 0; + } + else + { + if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size()) + return 0; + } + } } } else if (r1.isSmall()) { for (const auto & x : r1.small) - if (!roaring_bitmap_contains(rb, x.getValue())) - return 0; + { + if ( isUInt64() ) + { + if ( !rb64->contains( static_cast(x.getValue())) ) + return 0; + } + else + { + if (!roaring_bitmap_contains(rb, x.getValue())) + return 0; + } + } + } + else + { + if ( isUInt64() ) + { + if (!r1.rb64->isSubset(*rb64)) + return 0; + } + else + { + if (!roaring_bitmap_is_subset(r1.rb, rb)) + return 0; + } } - else if (!roaring_bitmap_is_subset(r1.rb, rb)) - return 0; - return 1; } /** * Check whether this bitmap contains the argument. */ - UInt8 rb_contains(const UInt32 x) const + UInt8 rb_contains(const UInt64 x) const { - return isSmall() ? small.find(x) != small.end() : roaring_bitmap_contains(rb, x); + if (isSmall()) + { + return small.find(x) != small.end(); + } + else + { + if ( isUInt64() ) + { + return rb64->contains(x); + } + else + { + return roaring_bitmap_contains(rb, x); + } + } } /** @@ -410,7 +621,11 @@ public: { if (isSmall()) toLarge(); - roaring_bitmap_remove(rb, offsetid); + + if ( isUInt64() ) + rb64->remove( offsetid ); + else + roaring_bitmap_remove(rb, offsetid); } /** @@ -423,7 +638,10 @@ public: { if (isSmall()) toLarge(); - roaring_bitmap_flip_inplace(rb, offsetstart, offsetend); + if ( isUInt64() ) + rb64->flip( offsetstart, offsetend ); + else + roaring_bitmap_flip_inplace(rb, offsetstart, offsetend); } /** @@ -433,7 +651,11 @@ public: { if (isSmall()) toLarge(); - return roaring_bitmap_rank(rb, offsetid); + + if ( isUInt64() ) + return rb64->rank( offsetid ); + else + return roaring_bitmap_rank(rb, offsetid); } /** @@ -453,13 +675,23 @@ public: } else { - roaring_uint32_iterator_t iterator; - roaring_init_iterator(rb, &iterator); - while (iterator.has_value) + if ( isUInt64() ) { - res_data.emplace_back(iterator.current_value); - roaring_advance_uint32_iterator(&iterator); - count++; + for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++) { + res_data.emplace_back( *iterator ); + count++; + } + } + else + { + roaring_uint32_iterator_t iterator; + roaring_init_iterator(rb, &iterator); + while (iterator.has_value) + { + res_data.emplace_back(iterator.current_value); + roaring_advance_uint32_iterator(&iterator); + count++; + } } } return count; @@ -468,7 +700,7 @@ public: /** * Return new set with specified range (not include the range_end) */ - UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet & r1) const + UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const { UInt64 count = 0; if (range_start >= range_end) @@ -487,14 +719,33 @@ public: } else { - roaring_uint32_iterator_t iterator; - roaring_init_iterator(rb, &iterator); - roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); - while (iterator.has_value && UInt32(iterator.current_value) < range_end) + if ( isUInt64() ) { - r1.add(iterator.current_value); - roaring_advance_uint32_iterator(&iterator); - ++count; + for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++) + { + if( *iterator < range_start ) + continue; + + if( *iterator < range_end) + { + r1.add( *iterator ); + ++count; + } + else + break; + } + } + else + { + roaring_uint32_iterator_t iterator; + roaring_init_iterator(rb, &iterator); + roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); + while (iterator.has_value && UInt32(iterator.current_value) < range_end) + { + r1.add(iterator.current_value); + roaring_advance_uint32_iterator(&iterator); + ++count; + } } } return count; @@ -503,7 +754,7 @@ public: /** * Return new set of the smallest `limit` values in set which is no less than `range_start`. */ - UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet & r1) const + UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const { UInt64 count = 0; if (isSmall()) @@ -526,14 +777,33 @@ public: } else { - roaring_uint32_iterator_t iterator; - roaring_init_iterator(rb, &iterator); - roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); - while (UInt32(count) < limit && iterator.has_value) + if ( isUInt64() ) { - r1.add(iterator.current_value); - roaring_advance_uint32_iterator(&iterator); - ++count; + for (Roaring64Map::const_iterator iterator = rb64->begin(); iterator != rb64->end(); iterator++) + { + if( *iterator < range_start ) + continue; + + if( count < limit) + { + r1.add( *iterator ); + ++count; + } + else + break; + } + } + else + { + roaring_uint32_iterator_t iterator; + roaring_init_iterator(rb, &iterator); + roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); + while (UInt32(count) < limit && iterator.has_value) + { + r1.add(iterator.current_value); + roaring_advance_uint32_iterator(&iterator); + ++count; + } } } return count; @@ -555,7 +825,10 @@ public: } else { - min_val = UInt64(roaring_bitmap_minimum(rb)); + if ( isUInt64() ) + min_val = rb64->minimum(); + else + min_val = UInt64(roaring_bitmap_minimum(rb)); } return min_val; } @@ -576,7 +849,10 @@ public: } else { - max_val = UInt64(roaring_bitmap_maximum(rb)); + if ( isUInt64() ) + max_val = rb64->maximum(); + else + max_val = UInt64(roaring_bitmap_maximum(rb)); } return max_val; } @@ -592,12 +868,144 @@ public: { if (from_vals[i] == to_vals[i]) continue; - bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]); - if (changed) - roaring_bitmap_add(rb, to_vals[i]); + + if ( isUInt64() ) + { + bool changed = rb64->removeChecked( from_vals[i] ); + if (changed) + rb64->add(to_vals[i]); + } + else + { + bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]); + if (changed) + roaring_bitmap_add(rb, to_vals[i]); + } } } +private: + /// To read and write the DB Buffer directly, migrate code from CRoaring + void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args) + { + void * container = nullptr; // hold value of last container touched + uint8_t typecode = 0; // typecode of last container touched + uint32_t prev = 0; // previous valued inserted + size_t i = 0; // index of value + int containerindex = 0; + if (n_args == 0) + return; + uint32_t val; + readBinary(val, db_buf); + container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); + prev = val; + ++i; + for (; i < n_args; ++i) + { + readBinary(val, db_buf); + if (((prev ^ val) >> 16) == 0) + { // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion + // automatically, bypassing the roaring_bitmap_add call + uint8_t newtypecode = typecode; + void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode); + // rare instance when we need to + if (container2 != container) + { + // change the container type + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode); + typecode = newtypecode; + container = container2; + } + } + else + { + container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); + } + prev = val; + } + } + + void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const + { + size_t ctr = 0; + for (Int32 i = 0; i < ra->size; ++i) + { + Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast(ra->keys[i])) << 16); + ctr += num_added; + } + } + + UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const + { + container = container_unwrap_shared(container, &typecode); + switch (typecode) + { + case BITSET_CONTAINER_TYPE_CODE: + return db_bitset_container_to_uint32_array(db_buf, static_cast(container), base); + case ARRAY_CONTAINER_TYPE_CODE: + return db_array_container_to_uint32_array(db_buf, static_cast(container), base); + case RUN_CONTAINER_TYPE_CODE: + return db_run_container_to_uint32_array(db_buf, static_cast(container), base); + } + return 0; + } + + UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const + { + return static_cast(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base)); + } + + size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const + { + UInt32 outpos = 0; + for (size_t i = 0; i < length; ++i) + { + UInt64 w = bitset[i]; + while (w != 0) + { + UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) + UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT + UInt32 val = r + base; + writePODBinary(val, db_buf); + outpos++; + w ^= t; + } + base += 64; + } + return outpos; + } + + int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const + { + UInt32 outpos = 0; + for (Int32 i = 0; i < cont->cardinality; ++i) + { + const UInt32 val = base + cont->array[i]; + writePODBinary(val, db_buf); + outpos++; + } + return outpos; + } + + int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const + { + UInt32 outpos = 0; + for (Int32 i = 0; i < cont->n_runs; ++i) + { + UInt32 run_start = base + cont->runs[i].value; + UInt16 le = cont->runs[i].length; + for (Int32 j = 0; j <= le; ++j) + { + UInt32 val = run_start + j; + writePODBinary(val, db_buf); + outpos++; + } + } + return outpos; + } }; template diff --git a/tests/queries/0_stateless/00829_bitmap64_function.reference b/tests/queries/0_stateless/00829_bitmap64_function.reference new file mode 100644 index 00000000000..20f5e19d780 --- /dev/null +++ b/tests/queries/0_stateless/00829_bitmap64_function.reference @@ -0,0 +1,14 @@ +2019-01-01 100 +2019-01-02 110 +2019-01-03 210 +210 +2019-01-01 100 [4294967296,4294967297,4294967298,4294967299,4294967300,4294967301,4294967302,4294967303,4294967304,4294967305,4294967306,4294967307,4294967308,4294967309,4294967310,4294967311,4294967312,4294967313,4294967314,4294967315,4294967316,4294967317,4294967318,4294967319,4294967320,4294967321,4294967322,4294967323,4294967324,4294967325,4294967326,4294967327,4294967328,4294967329,4294967330,4294967331,4294967332,4294967333,4294967334,4294967335,4294967336,4294967337,4294967338,4294967339,4294967340,4294967341,4294967342,4294967343,4294967344,4294967345,4294967346,4294967347,4294967348,4294967349,4294967350,4294967351,4294967352,4294967353,4294967354,4294967355,4294967356,4294967357,4294967358,4294967359,4294967360,4294967361,4294967362,4294967363,4294967364,4294967365,4294967366,4294967367,4294967368,4294967369,4294967370,4294967371,4294967372,4294967373,4294967374,4294967375,4294967376,4294967377,4294967378,4294967379,4294967380,4294967381,4294967382,4294967383,4294967384,4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395] +2019-01-02 110 [4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395,4294967396,4294967397,4294967398,4294967399,4294967400,4294967401,4294967402,4294967403,4294967404,4294967405,4294967406,4294967407,4294967408,4294967409,4294967410,4294967411,4294967412,4294967413,4294967414,4294967415,4294967416,4294967417,4294967418,4294967419,4294967420,4294967421,4294967422,4294967423,4294967424,4294967425,4294967426,4294967427,4294967428,4294967429,4294967430,4294967431,4294967432,4294967433,4294967434,4294967435,4294967436,4294967437,4294967438,4294967439,4294967440,4294967441,4294967442,4294967443,4294967444,4294967445,4294967446,4294967447,4294967448,4294967449,4294967450,4294967451,4294967452,4294967453,4294967454,4294967455,4294967456,4294967457,4294967458,4294967459,4294967460,4294967461,4294967462,4294967463,4294967464,4294967465,4294967466,4294967467,4294967468,4294967469,4294967470,4294967471,4294967472,4294967473,4294967474,4294967475,4294967476,4294967477,4294967478,4294967479,4294967480,4294967481,4294967482,4294967483,4294967484,4294967485,4294967486,4294967487,4294967488,4294967489,4294967490,4294967491,4294967492,4294967493,4294967494] +2019-01-03 210 [4294967296,4294967297,4294967298,4294967299,4294967300,4294967301,4294967302,4294967303,4294967304,4294967305,4294967306,4294967307,4294967308,4294967309,4294967310,4294967311,4294967312,4294967313,4294967314,4294967315,4294967316,4294967317,4294967318,4294967319,4294967320,4294967321,4294967322,4294967323,4294967324,4294967325,4294967326,4294967327,4294967328,4294967329,4294967330,4294967331,4294967332,4294967333,4294967334,4294967335,4294967336,4294967337,4294967338,4294967339,4294967340,4294967341,4294967342,4294967343,4294967344,4294967345,4294967346,4294967347,4294967348,4294967349,4294967350,4294967351,4294967352,4294967353,4294967354,4294967355,4294967356,4294967357,4294967358,4294967359,4294967360,4294967361,4294967362,4294967363,4294967364,4294967365,4294967366,4294967367,4294967368,4294967369,4294967370,4294967371,4294967372,4294967373,4294967374,4294967375,4294967376,4294967377,4294967378,4294967379,4294967380,4294967381,4294967382,4294967383,4294967384,4294967385,4294967386,4294967387,4294967388,4294967389,4294967390,4294967391,4294967392,4294967393,4294967394,4294967395,4294967396,4294967397,4294967398,4294967399,4294967400,4294967401,4294967402,4294967403,4294967404,4294967405,4294967406,4294967407,4294967408,4294967409,4294967410,4294967411,4294967412,4294967413,4294967414,4294967415,4294967416,4294967417,4294967418,4294967419,4294967420,4294967421,4294967422,4294967423,4294967424,4294967425,4294967426,4294967427,4294967428,4294967429,4294967430,4294967431,4294967432,4294967433,4294967434,4294967435,4294967436,4294967437,4294967438,4294967439,4294967440,4294967441,4294967442,4294967443,4294967444,4294967445,4294967446,4294967447,4294967448,4294967449,4294967450,4294967451,4294967452,4294967453,4294967454,4294967455,4294967456,4294967457,4294967458,4294967459,4294967460,4294967461,4294967462,4294967463,4294967464,4294967465,4294967466,4294967467,4294967468,4294967469,4294967470,4294967471,4294967472,4294967473,4294967474,4294967475,4294967476,4294967477,4294967478,4294967479,4294967480,4294967481,4294967482,4294967483,4294967484,4294967485,4294967486,4294967487,4294967488,4294967489,4294967490,4294967491,4294967492,4294967493,4294967494,4294967495,4294967496,4294967497,4294967498,4294967499,4294967500,4294967501,4294967502,4294967503,4294967504,4294967505] +110 100 199 11 99 188 +110 100 199 11 99 188 +211 +211 +209 +[4294967296,4294967297,4294967298] +[4294967296,4294967297,4294967298] diff --git a/tests/queries/0_stateless/00829_bitmap64_function.sql b/tests/queries/0_stateless/00829_bitmap64_function.sql new file mode 100644 index 00000000000..9d33dcefff9 --- /dev/null +++ b/tests/queries/0_stateless/00829_bitmap64_function.sql @@ -0,0 +1,73 @@ +DROP TABLE IF EXISTS bitmap_test; +CREATE TABLE bitmap_test(pickup_date Date, city_id UInt32, uid UInt64)ENGINE = Memory; +INSERT INTO bitmap_test SELECT '2019-01-01', 1, 4294967295 + number FROM numbers(1,100); +INSERT INTO bitmap_test SELECT '2019-01-02', 1, 4294967295 + number FROM numbers(90,110); +INSERT INTO bitmap_test SELECT '2019-01-03', 2, 4294967295 + number FROM numbers(1,210); + + +DROP TABLE IF EXISTS bitmap_state_test; +CREATE TABLE bitmap_state_test +( + pickup_date Date, + city_id UInt32, + uv AggregateFunction( groupBitmap, UInt64 ) +) +ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(pickup_date) ORDER BY (pickup_date, city_id); + + +INSERT INTO bitmap_state_test SELECT + pickup_date, + city_id, + groupBitmapState(uid) AS uv +FROM bitmap_test +GROUP BY pickup_date, city_id; + +SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date; + +SELECT groupBitmap( uid ) AS user_num FROM bitmap_test; + +SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date; + +SELECT + bitmapCardinality(day_today) AS today_users, + bitmapCardinality(day_before) AS before_users, + bitmapOrCardinality(day_today, day_before) AS all_users, + bitmapAndCardinality(day_today, day_before) AS old_users, + bitmapAndnotCardinality(day_today, day_before) AS new_users, + bitmapXorCardinality(day_today, day_before) AS diff_users +FROM +( + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id +) js1 +ALL LEFT JOIN +( + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id +) js2 +USING city_id; + +SELECT + bitmapCardinality(day_today) AS today_users, + bitmapCardinality(day_before) AS before_users, + bitmapCardinality(bitmapOr(day_today, day_before))ll_users, + bitmapCardinality(bitmapAnd(day_today, day_before)) AS old_users, + bitmapCardinality(bitmapAndnot(day_today, day_before)) AS new_users, + bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users +FROM +( + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id +) js1 +ALL LEFT JOIN +( + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id +) js2 +USING city_id; + +SELECT count(*) FROM bitmap_test WHERE bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid])); + +SELECT count(*) FROM bitmap_test WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01')); + +SELECT count(*) FROM bitmap_test WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid])); + +SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id; + +