Post-PR fixes #2

Fixed style issues and build for clang-7; 64-bit buffer for BitReader and BitWriter; Fixed overflow and writing more bytes on flushing; Added maskLowBits() and tests for it.
2024-11-21 23:21:59 +00:00 · 2019-06-17 06:27:42 +03:00 · 2019-06-17 06:27:42 +03:00 · 81f9055d81
commit 81f9055d81
parent 0ebb145d32
6 changed files with 81 additions and 45 deletions
--- a/dbms/src/Common/BitHelpers.h
+++ b/dbms/src/Common/BitHelpers.h
@ -72,3 +72,12 @@ inline size_t getTrailingZeroBits(T x)
        return __builtin_ctzll(x);
    }
 }
+
+/** Returns a mask that has '1' for `bits` LSB set:
+ * maskLowBits<UInt8>(3) => 00000111
+ */
+template <typename T>
+inline T maskLowBits(unsigned char bits)
+{
+    return static_cast<T>(static_cast<T>(1) << bits) - 1;
+}
--- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
@ -173,7 +173,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
    if (source < source_end)
    {
        prev_delta = unalignedLoad<DeltaType>(source);
-        prev_value = static_cast<DeltaType>(prev_value) + prev_delta;
+        prev_value = static_cast<T>(prev_value + prev_delta);
        unalignedStore(dest, prev_value);

        source += sizeof(prev_delta);
@ -229,7 +229,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
    return data_bytes_size;
 }

-} // namespace
+}


 CompressionCodecDoubleDelta::CompressionCodecDoubleDelta(UInt8 data_bytes_size_)
--- a/dbms/src/Compression/CompressionCodecGorilla.cpp
+++ b/dbms/src/Compression/CompressionCodecGorilla.cpp
@ -230,7 +230,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
    return delta_bytes_size;
 }

-} // namespace
+}


 CompressionCodecGorilla::CompressionCodecGorilla(UInt8 data_bytes_size_)
--- a/dbms/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/dbms/src/Compression/tests/gtest_compressionCodec.cpp
@ -5,14 +5,19 @@
 #include <IO/WriteHelpers.h>
 #include <Common/PODArray.h>

-#include <gtest/gtest.h>
-
 #include <cmath>
 #include <initializer_list>
 #include <iomanip>
 #include <memory>
 #include <vector>

+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
 namespace
 {
 using namespace DB;
--- a/dbms/src/IO/BitHelpers.h
+++ b/dbms/src/IO/BitHelpers.h
@ -3,6 +3,7 @@
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 #include <Core/Types.h>
+#include <Common/BitHelpers.h>

 namespace DB
 {
@ -22,12 +23,11 @@ namespace DB
 *  r.readBit()    => 0b0
 **/

-
 class BitReader
 {
    ReadBuffer & buf;

-    UInt8 bits_buffer;
+    UInt64 bits_buffer;
    UInt8 bits_count;
    static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;

@ -59,13 +59,13 @@ public:
            }

            const auto to_read = std::min(bits, bits_count);
-            // read MSB bits from bits_bufer
-            const UInt8 v = bits_buffer >> (bits_count - to_read);
-            const UInt8 mask = static_cast<UInt8>(~(~0U << to_read));
-            const UInt8 value = v & mask;
+
+            const UInt64 v = bits_buffer >> (bits_count - to_read);
+            const UInt64 mask = maskLowBits<UInt64>(to_read);
+            const UInt64 value = v & mask;
            result |= value;

-            // unset MSB that were read
+            // unset bits that were read
            bits_buffer &= ~(mask << (bits_count - to_read));
            bits_count -= to_read;
            bits -= to_read;
@ -95,6 +95,9 @@ private:
    void fillBuffer()
    {
        auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
+        bits_buffer = be64toh(bits_buffer);
+        bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
+
        bits_count = static_cast<UInt8>(read) * 8;
    }
 };
@ -103,7 +106,7 @@ class BitWriter
 {
    WriteBuffer & buf;

-    UInt8 bits_buffer;
+    UInt64 bits_buffer;
    UInt8 bits_count;

    static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
@ -132,13 +135,11 @@ public:
            const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
            if (capacity < bits)
            {
-                // write MSB:
                v >>= bits - capacity;
                to_write = capacity;
            }

-
-            const UInt64 mask = (1 << to_write) - 1;
+            const UInt64 mask = maskLowBits<UInt64>(to_write);
            v &= mask;
 //            assert(v <= 255);

@ -166,7 +167,8 @@ public:
 private:
    void doFlush()
    {
-        buf.write(reinterpret_cast<const char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
+        bits_buffer = htobe64(bits_buffer);
+        buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);

        bits_count = 0;
        bits_buffer = 0;
--- a/dbms/src/IO/tests/gtest_bit_io.cpp
+++ b/dbms/src/IO/tests/gtest_bit_io.cpp
@ -3,29 +3,32 @@
 #include <Core/Types.h>
 #include <IO/MemoryReadWriteBuffer.h>
 #include <IO/ReadBufferFromMemory.h>
-
-#include <gtest/gtest.h>
+#include <Common/BitHelpers.h>
+#include <Common/PODArray.h>

 #include <memory>
 #include <iostream>
 #include <iomanip>
 #include <bitset>

-//#pragma GCC diagnostic push
-//#pragma GCC diagnostic ignored "-Wunused-const-variable"
-//#pragma GCC diagnostic ignored "-Wunused-variable"
-//#pragma GCC diagnostic ignored "-Wunused-function"
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#pragma clang diagnostic ignored "-Wundef"
+#endif
+
+#include <gtest/gtest.h>
+

 namespace
 {
 using namespace DB;

-// Intentionally asymetric both byte and word-size to detect read and write inconsistencies
+// Intentionally asymmetric both byte and word-size to detect read and write inconsistencies
 // each prime bit is set to 0.
 //                              v-61     v-53   v-47  v-41 v-37   v-31     v-23  v-17   v-11   v-5
 const UInt64 BIT_PATTERN = 0b11101011'11101111'10111010'11101111'10101111'10111010'11101011'10101001;
 const UInt8 PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61};
-const UInt8 REPEAT_TIMES = 11;

 template <typename T>
 std::string bin(const T & value, size_t bits = sizeof(T)*8)
@ -44,8 +47,9 @@ T getBits(UInt8 bits, const T & value)
    return value & mask;
 }

-std::ostream & dumpBuffer(const char * begin,
-                          const char * end,
+template <typename T>
+std::ostream & dumpBuffer(const T begin,
+                          const T end,
                          std::ostream * destination,
                          const char* col_sep = " ",
                          const char* row_sep = "\n",
@ -70,14 +74,15 @@ std::ostream & dumpBuffer(const char * begin,
    return *destination;
 }

-std::string dumpBufferContents(BufferBase & buf,
-                               const char* col_sep = " ",
-                               const char* row_sep = "\n",
-                               const size_t cols_in_row = 8)
+template <typename T>
+std::string dumpContents(const T& container,
+                         const char* col_sep = " ",
+                         const char* row_sep = "\n",
+                         const size_t cols_in_row = 8)

 {
    std::stringstream sstr;
-    dumpBuffer(buf.buffer().begin(), buf.buffer().end(), &sstr, col_sep, row_sep, cols_in_row);
+    dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);

    return sstr.str();
 }
@ -107,13 +112,14 @@ TEST_P(BitIO, WriteAndRead)
    {
        max_buffer_size += bv.first;
    }
-    max_buffer_size = (max_buffer_size + 8) / 8;
+    max_buffer_size = (max_buffer_size + 7) / 8;
    SCOPED_TRACE(max_buffer_size);

-    MemoryWriteBuffer memory_write_buffer(max_buffer_size * 2, max_buffer_size, 1.5, max_buffer_size);
+    PODArray<char> data(max_buffer_size);

    {
-        BitWriter writer(memory_write_buffer);
+        WriteBuffer write_buffer(data.data(), data.size());
+        BitWriter writer(write_buffer);
        for (const auto & bv : bits_and_vals)
        {
            writer.writeBits(bv.first, bv.second);
@ -122,25 +128,27 @@ TEST_P(BitIO, WriteAndRead)
    }

    {
-        auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
+        ReadBufferFromMemory read_buffer(data.data(), data.size());
+//        auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();

        if (expected_buffer_binary != std::string{})
        {
-            const auto actual_buffer_binary = dumpBufferContents(*memory_read_buffer, " ", " ");
+            const auto actual_buffer_binary = dumpContents(data, " ", " ");
            ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
        }

-        BitReader reader(*memory_read_buffer);
+        BitReader reader(read_buffer);

        int item = 0;
        for (const auto & bv : bits_and_vals)
        {
-            const auto actual_value = reader.readBits(bv.first);
+            SCOPED_TRACE(::testing::Message()
+                         << "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
+                         << ", value: " << bin(bv.second)
+                         << ".\n\n\nBuffer memory:\n" << dumpContents(data));

-            ASSERT_EQ(getBits(bv.first, bv.second), actual_value)
-                    << "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
-                    << ", value: " << bin(bv.second)
-                    << ".\n\n\nBuffer memory:\n" << dumpBufferContents(*memory_read_buffer);
+            //EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
+            EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));

            ++item;
        }
@ -157,7 +165,9 @@ INSTANTIATE_TEST_CASE_P(Simple,
                {{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
                "01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
            TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
-            TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}})
+            TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
+            TestCaseParameter({{24, 0xFFFFFFFF}},
+                "11111111 11111111 11111111 ")
 ));

 TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
@ -184,5 +194,15 @@ INSTANTIATE_TEST_CASE_P(Primes,
            primes_case(11, BIT_PATTERN)
 ));

-} // namespace
+}

+TEST(BitHelpers, MaskLowBits)
+{
+    EXPECT_EQ(0b00000111, ::maskLowBits<UInt8>(3));
+    EXPECT_EQ(0b01111111, ::maskLowBits<UInt8>(7));
+    EXPECT_EQ(0b0000000001111111, ::maskLowBits<UInt16>(7));
+    EXPECT_EQ(0b0001111111111111, ::maskLowBits<UInt16>(13));
+    EXPECT_EQ(0b00000111111111111111111111111111, ::maskLowBits<UInt32>(27));
+    EXPECT_EQ(0b111111111111111111111111111111111, ::maskLowBits<UInt64>(33));
+    EXPECT_EQ(0b11111111111111111111111111111111111, ::maskLowBits<UInt64>(35));
+}