Post-PR fixes #2

Fixed style issues and build for clang-7;
64-bit buffer for BitReader and BitWriter;
Fixed overflow and writing more bytes on flushing;
Added maskLowBits() and tests for it.
This commit is contained in:
Vasily Nemkov 2019-06-17 06:27:42 +03:00
parent 0ebb145d32
commit 81f9055d81
6 changed files with 81 additions and 45 deletions

View File

@ -72,3 +72,12 @@ inline size_t getTrailingZeroBits(T x)
return __builtin_ctzll(x);
}
}
/** Returns a mask that has '1' for `bits` LSB set:
* maskLowBits<UInt8>(3) => 00000111
*/
template <typename T>
inline T maskLowBits(unsigned char bits)
{
return static_cast<T>(static_cast<T>(1) << bits) - 1;
}

View File

@ -173,7 +173,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
if (source < source_end)
{
prev_delta = unalignedLoad<DeltaType>(source);
prev_value = static_cast<DeltaType>(prev_value) + prev_delta;
prev_value = static_cast<T>(prev_value + prev_delta);
unalignedStore(dest, prev_value);
source += sizeof(prev_delta);
@ -229,7 +229,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
return data_bytes_size;
}
} // namespace
}
CompressionCodecDoubleDelta::CompressionCodecDoubleDelta(UInt8 data_bytes_size_)

View File

@ -230,7 +230,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
return delta_bytes_size;
}
} // namespace
}
CompressionCodecGorilla::CompressionCodecGorilla(UInt8 data_bytes_size_)

View File

@ -5,14 +5,19 @@
#include <IO/WriteHelpers.h>
#include <Common/PODArray.h>
#include <gtest/gtest.h>
#include <cmath>
#include <initializer_list>
#include <iomanip>
#include <memory>
#include <vector>
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wundef"
#endif
#include <gtest/gtest.h>
namespace
{
using namespace DB;

View File

@ -3,6 +3,7 @@
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Core/Types.h>
#include <Common/BitHelpers.h>
namespace DB
{
@ -22,12 +23,11 @@ namespace DB
* r.readBit() => 0b0
**/
class BitReader
{
ReadBuffer & buf;
UInt8 bits_buffer;
UInt64 bits_buffer;
UInt8 bits_count;
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
@ -59,13 +59,13 @@ public:
}
const auto to_read = std::min(bits, bits_count);
// read MSB bits from bits_bufer
const UInt8 v = bits_buffer >> (bits_count - to_read);
const UInt8 mask = static_cast<UInt8>(~(~0U << to_read));
const UInt8 value = v & mask;
const UInt64 v = bits_buffer >> (bits_count - to_read);
const UInt64 mask = maskLowBits<UInt64>(to_read);
const UInt64 value = v & mask;
result |= value;
// unset MSB that were read
// unset bits that were read
bits_buffer &= ~(mask << (bits_count - to_read));
bits_count -= to_read;
bits -= to_read;
@ -95,6 +95,9 @@ private:
void fillBuffer()
{
auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
bits_buffer = be64toh(bits_buffer);
bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
bits_count = static_cast<UInt8>(read) * 8;
}
};
@ -103,7 +106,7 @@ class BitWriter
{
WriteBuffer & buf;
UInt8 bits_buffer;
UInt64 bits_buffer;
UInt8 bits_count;
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
@ -132,13 +135,11 @@ public:
const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
if (capacity < bits)
{
// write MSB:
v >>= bits - capacity;
to_write = capacity;
}
const UInt64 mask = (1 << to_write) - 1;
const UInt64 mask = maskLowBits<UInt64>(to_write);
v &= mask;
// assert(v <= 255);
@ -166,7 +167,8 @@ public:
private:
void doFlush()
{
buf.write(reinterpret_cast<const char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
bits_buffer = htobe64(bits_buffer);
buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);
bits_count = 0;
bits_buffer = 0;

View File

@ -3,29 +3,32 @@
#include <Core/Types.h>
#include <IO/MemoryReadWriteBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <gtest/gtest.h>
#include <Common/BitHelpers.h>
#include <Common/PODArray.h>
#include <memory>
#include <iostream>
#include <iomanip>
#include <bitset>
//#pragma GCC diagnostic push
//#pragma GCC diagnostic ignored "-Wunused-const-variable"
//#pragma GCC diagnostic ignored "-Wunused-variable"
//#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wundef"
#endif
#include <gtest/gtest.h>
namespace
{
using namespace DB;
// Intentionally asymetric both byte and word-size to detect read and write inconsistencies
// Intentionally asymmetric both byte and word-size to detect read and write inconsistencies
// each prime bit is set to 0.
// v-61 v-53 v-47 v-41 v-37 v-31 v-23 v-17 v-11 v-5
const UInt64 BIT_PATTERN = 0b11101011'11101111'10111010'11101111'10101111'10111010'11101011'10101001;
const UInt8 PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61};
const UInt8 REPEAT_TIMES = 11;
template <typename T>
std::string bin(const T & value, size_t bits = sizeof(T)*8)
@ -44,8 +47,9 @@ T getBits(UInt8 bits, const T & value)
return value & mask;
}
std::ostream & dumpBuffer(const char * begin,
const char * end,
template <typename T>
std::ostream & dumpBuffer(const T begin,
const T end,
std::ostream * destination,
const char* col_sep = " ",
const char* row_sep = "\n",
@ -70,14 +74,15 @@ std::ostream & dumpBuffer(const char * begin,
return *destination;
}
std::string dumpBufferContents(BufferBase & buf,
const char* col_sep = " ",
const char* row_sep = "\n",
const size_t cols_in_row = 8)
template <typename T>
std::string dumpContents(const T& container,
const char* col_sep = " ",
const char* row_sep = "\n",
const size_t cols_in_row = 8)
{
std::stringstream sstr;
dumpBuffer(buf.buffer().begin(), buf.buffer().end(), &sstr, col_sep, row_sep, cols_in_row);
dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);
return sstr.str();
}
@ -107,13 +112,14 @@ TEST_P(BitIO, WriteAndRead)
{
max_buffer_size += bv.first;
}
max_buffer_size = (max_buffer_size + 8) / 8;
max_buffer_size = (max_buffer_size + 7) / 8;
SCOPED_TRACE(max_buffer_size);
MemoryWriteBuffer memory_write_buffer(max_buffer_size * 2, max_buffer_size, 1.5, max_buffer_size);
PODArray<char> data(max_buffer_size);
{
BitWriter writer(memory_write_buffer);
WriteBuffer write_buffer(data.data(), data.size());
BitWriter writer(write_buffer);
for (const auto & bv : bits_and_vals)
{
writer.writeBits(bv.first, bv.second);
@ -122,25 +128,27 @@ TEST_P(BitIO, WriteAndRead)
}
{
auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
ReadBufferFromMemory read_buffer(data.data(), data.size());
// auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
if (expected_buffer_binary != std::string{})
{
const auto actual_buffer_binary = dumpBufferContents(*memory_read_buffer, " ", " ");
const auto actual_buffer_binary = dumpContents(data, " ", " ");
ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
}
BitReader reader(*memory_read_buffer);
BitReader reader(read_buffer);
int item = 0;
for (const auto & bv : bits_and_vals)
{
const auto actual_value = reader.readBits(bv.first);
SCOPED_TRACE(::testing::Message()
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
<< ", value: " << bin(bv.second)
<< ".\n\n\nBuffer memory:\n" << dumpContents(data));
ASSERT_EQ(getBits(bv.first, bv.second), actual_value)
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
<< ", value: " << bin(bv.second)
<< ".\n\n\nBuffer memory:\n" << dumpBufferContents(*memory_read_buffer);
//EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));
++item;
}
@ -157,7 +165,9 @@ INSTANTIATE_TEST_CASE_P(Simple,
{{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
"01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}})
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
TestCaseParameter({{24, 0xFFFFFFFF}},
"11111111 11111111 11111111 ")
));
TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
@ -184,5 +194,15 @@ INSTANTIATE_TEST_CASE_P(Primes,
primes_case(11, BIT_PATTERN)
));
} // namespace
}
TEST(BitHelpers, MaskLowBits)
{
EXPECT_EQ(0b00000111, ::maskLowBits<UInt8>(3));
EXPECT_EQ(0b01111111, ::maskLowBits<UInt8>(7));
EXPECT_EQ(0b0000000001111111, ::maskLowBits<UInt16>(7));
EXPECT_EQ(0b0001111111111111, ::maskLowBits<UInt16>(13));
EXPECT_EQ(0b00000111111111111111111111111111, ::maskLowBits<UInt32>(27));
EXPECT_EQ(0b111111111111111111111111111111111, ::maskLowBits<UInt64>(33));
EXPECT_EQ(0b11111111111111111111111111111111111, ::maskLowBits<UInt64>(35));
}