mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Post-PR fixes #2
Fixed style issues and build for clang-7; 64-bit buffer for BitReader and BitWriter; Fixed overflow and writing more bytes on flushing; Added maskLowBits() and tests for it.
This commit is contained in:
parent
0ebb145d32
commit
81f9055d81
@ -72,3 +72,12 @@ inline size_t getTrailingZeroBits(T x)
|
||||
return __builtin_ctzll(x);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a mask that has '1' for `bits` LSB set:
|
||||
* maskLowBits<UInt8>(3) => 00000111
|
||||
*/
|
||||
template <typename T>
|
||||
inline T maskLowBits(unsigned char bits)
|
||||
{
|
||||
return static_cast<T>(static_cast<T>(1) << bits) - 1;
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_delta = unalignedLoad<DeltaType>(source);
|
||||
prev_value = static_cast<DeltaType>(prev_value) + prev_delta;
|
||||
prev_value = static_cast<T>(prev_value + prev_delta);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_delta);
|
||||
@ -229,7 +229,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
|
||||
return data_bytes_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
}
|
||||
|
||||
|
||||
CompressionCodecDoubleDelta::CompressionCodecDoubleDelta(UInt8 data_bytes_size_)
|
||||
|
@ -230,7 +230,7 @@ UInt8 getDataBytesSize(DataTypePtr column_type)
|
||||
return delta_bytes_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
}
|
||||
|
||||
|
||||
CompressionCodecGorilla::CompressionCodecGorilla(UInt8 data_bytes_size_)
|
||||
|
@ -5,14 +5,19 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <initializer_list>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wundef"
|
||||
#endif
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace DB;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -22,12 +23,11 @@ namespace DB
|
||||
* r.readBit() => 0b0
|
||||
**/
|
||||
|
||||
|
||||
class BitReader
|
||||
{
|
||||
ReadBuffer & buf;
|
||||
|
||||
UInt8 bits_buffer;
|
||||
UInt64 bits_buffer;
|
||||
UInt8 bits_count;
|
||||
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
|
||||
|
||||
@ -59,13 +59,13 @@ public:
|
||||
}
|
||||
|
||||
const auto to_read = std::min(bits, bits_count);
|
||||
// read MSB bits from bits_bufer
|
||||
const UInt8 v = bits_buffer >> (bits_count - to_read);
|
||||
const UInt8 mask = static_cast<UInt8>(~(~0U << to_read));
|
||||
const UInt8 value = v & mask;
|
||||
|
||||
const UInt64 v = bits_buffer >> (bits_count - to_read);
|
||||
const UInt64 mask = maskLowBits<UInt64>(to_read);
|
||||
const UInt64 value = v & mask;
|
||||
result |= value;
|
||||
|
||||
// unset MSB that were read
|
||||
// unset bits that were read
|
||||
bits_buffer &= ~(mask << (bits_count - to_read));
|
||||
bits_count -= to_read;
|
||||
bits -= to_read;
|
||||
@ -95,6 +95,9 @@ private:
|
||||
void fillBuffer()
|
||||
{
|
||||
auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
|
||||
bits_buffer = be64toh(bits_buffer);
|
||||
bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
|
||||
|
||||
bits_count = static_cast<UInt8>(read) * 8;
|
||||
}
|
||||
};
|
||||
@ -103,7 +106,7 @@ class BitWriter
|
||||
{
|
||||
WriteBuffer & buf;
|
||||
|
||||
UInt8 bits_buffer;
|
||||
UInt64 bits_buffer;
|
||||
UInt8 bits_count;
|
||||
|
||||
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
|
||||
@ -132,13 +135,11 @@ public:
|
||||
const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
|
||||
if (capacity < bits)
|
||||
{
|
||||
// write MSB:
|
||||
v >>= bits - capacity;
|
||||
to_write = capacity;
|
||||
}
|
||||
|
||||
|
||||
const UInt64 mask = (1 << to_write) - 1;
|
||||
const UInt64 mask = maskLowBits<UInt64>(to_write);
|
||||
v &= mask;
|
||||
// assert(v <= 255);
|
||||
|
||||
@ -166,7 +167,8 @@ public:
|
||||
private:
|
||||
void doFlush()
|
||||
{
|
||||
buf.write(reinterpret_cast<const char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
|
||||
bits_buffer = htobe64(bits_buffer);
|
||||
buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);
|
||||
|
||||
bits_count = 0;
|
||||
bits_buffer = 0;
|
||||
|
@ -3,29 +3,32 @@
|
||||
#include <Core/Types.h>
|
||||
#include <IO/MemoryReadWriteBuffer.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <bitset>
|
||||
|
||||
//#pragma GCC diagnostic push
|
||||
//#pragma GCC diagnostic ignored "-Wunused-const-variable"
|
||||
//#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
//#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wundef"
|
||||
#endif
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
// Intentionally asymetric both byte and word-size to detect read and write inconsistencies
|
||||
// Intentionally asymmetric both byte and word-size to detect read and write inconsistencies
|
||||
// each prime bit is set to 0.
|
||||
// v-61 v-53 v-47 v-41 v-37 v-31 v-23 v-17 v-11 v-5
|
||||
const UInt64 BIT_PATTERN = 0b11101011'11101111'10111010'11101111'10101111'10111010'11101011'10101001;
|
||||
const UInt8 PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61};
|
||||
const UInt8 REPEAT_TIMES = 11;
|
||||
|
||||
template <typename T>
|
||||
std::string bin(const T & value, size_t bits = sizeof(T)*8)
|
||||
@ -44,8 +47,9 @@ T getBits(UInt8 bits, const T & value)
|
||||
return value & mask;
|
||||
}
|
||||
|
||||
std::ostream & dumpBuffer(const char * begin,
|
||||
const char * end,
|
||||
template <typename T>
|
||||
std::ostream & dumpBuffer(const T begin,
|
||||
const T end,
|
||||
std::ostream * destination,
|
||||
const char* col_sep = " ",
|
||||
const char* row_sep = "\n",
|
||||
@ -70,14 +74,15 @@ std::ostream & dumpBuffer(const char * begin,
|
||||
return *destination;
|
||||
}
|
||||
|
||||
std::string dumpBufferContents(BufferBase & buf,
|
||||
const char* col_sep = " ",
|
||||
const char* row_sep = "\n",
|
||||
const size_t cols_in_row = 8)
|
||||
template <typename T>
|
||||
std::string dumpContents(const T& container,
|
||||
const char* col_sep = " ",
|
||||
const char* row_sep = "\n",
|
||||
const size_t cols_in_row = 8)
|
||||
|
||||
{
|
||||
std::stringstream sstr;
|
||||
dumpBuffer(buf.buffer().begin(), buf.buffer().end(), &sstr, col_sep, row_sep, cols_in_row);
|
||||
dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);
|
||||
|
||||
return sstr.str();
|
||||
}
|
||||
@ -107,13 +112,14 @@ TEST_P(BitIO, WriteAndRead)
|
||||
{
|
||||
max_buffer_size += bv.first;
|
||||
}
|
||||
max_buffer_size = (max_buffer_size + 8) / 8;
|
||||
max_buffer_size = (max_buffer_size + 7) / 8;
|
||||
SCOPED_TRACE(max_buffer_size);
|
||||
|
||||
MemoryWriteBuffer memory_write_buffer(max_buffer_size * 2, max_buffer_size, 1.5, max_buffer_size);
|
||||
PODArray<char> data(max_buffer_size);
|
||||
|
||||
{
|
||||
BitWriter writer(memory_write_buffer);
|
||||
WriteBuffer write_buffer(data.data(), data.size());
|
||||
BitWriter writer(write_buffer);
|
||||
for (const auto & bv : bits_and_vals)
|
||||
{
|
||||
writer.writeBits(bv.first, bv.second);
|
||||
@ -122,25 +128,27 @@ TEST_P(BitIO, WriteAndRead)
|
||||
}
|
||||
|
||||
{
|
||||
auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
|
||||
ReadBufferFromMemory read_buffer(data.data(), data.size());
|
||||
// auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
|
||||
|
||||
if (expected_buffer_binary != std::string{})
|
||||
{
|
||||
const auto actual_buffer_binary = dumpBufferContents(*memory_read_buffer, " ", " ");
|
||||
const auto actual_buffer_binary = dumpContents(data, " ", " ");
|
||||
ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
|
||||
}
|
||||
|
||||
BitReader reader(*memory_read_buffer);
|
||||
BitReader reader(read_buffer);
|
||||
|
||||
int item = 0;
|
||||
for (const auto & bv : bits_and_vals)
|
||||
{
|
||||
const auto actual_value = reader.readBits(bv.first);
|
||||
SCOPED_TRACE(::testing::Message()
|
||||
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
|
||||
<< ", value: " << bin(bv.second)
|
||||
<< ".\n\n\nBuffer memory:\n" << dumpContents(data));
|
||||
|
||||
ASSERT_EQ(getBits(bv.first, bv.second), actual_value)
|
||||
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
|
||||
<< ", value: " << bin(bv.second)
|
||||
<< ".\n\n\nBuffer memory:\n" << dumpBufferContents(*memory_read_buffer);
|
||||
//EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
|
||||
EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));
|
||||
|
||||
++item;
|
||||
}
|
||||
@ -157,7 +165,9 @@ INSTANTIATE_TEST_CASE_P(Simple,
|
||||
{{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
|
||||
"01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
|
||||
TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
|
||||
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}})
|
||||
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
|
||||
TestCaseParameter({{24, 0xFFFFFFFF}},
|
||||
"11111111 11111111 11111111 ")
|
||||
));
|
||||
|
||||
TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
|
||||
@ -184,5 +194,15 @@ INSTANTIATE_TEST_CASE_P(Primes,
|
||||
primes_case(11, BIT_PATTERN)
|
||||
));
|
||||
|
||||
} // namespace
|
||||
}
|
||||
|
||||
TEST(BitHelpers, MaskLowBits)
|
||||
{
|
||||
EXPECT_EQ(0b00000111, ::maskLowBits<UInt8>(3));
|
||||
EXPECT_EQ(0b01111111, ::maskLowBits<UInt8>(7));
|
||||
EXPECT_EQ(0b0000000001111111, ::maskLowBits<UInt16>(7));
|
||||
EXPECT_EQ(0b0001111111111111, ::maskLowBits<UInt16>(13));
|
||||
EXPECT_EQ(0b00000111111111111111111111111111, ::maskLowBits<UInt32>(27));
|
||||
EXPECT_EQ(0b111111111111111111111111111111111, ::maskLowBits<UInt64>(33));
|
||||
EXPECT_EQ(0b11111111111111111111111111111111111, ::maskLowBits<UInt64>(35));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user