Merge remote-tracking branch 'main/master' into h3-integration

This commit is contained in:
Ivan Remen 2019-06-26 14:52:46 +03:00
commit 93523416d7
32 changed files with 2106 additions and 64 deletions

View File

@ -14,5 +14,6 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse on HighLoad++ Siberia](https://www.highload.ru/siberia/2019/abstracts/5348) on June 24-25.
* [ClickHouse Meetup in Novosibirsk](https://events.yandex.ru/events/ClickHouse/26-June-2019/) on June 26.
* [ClickHouse Meetup in Minsk](https://yandex.ru/promo/metrica/clickhouse-minsk) on July 11.
* [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20.
* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27.

View File

@ -1,6 +1,7 @@
#pragma once
#include <cstddef>
#include <type_traits>
/** Returns log2 of number, rounded down.
@ -30,3 +31,64 @@ inline size_t roundUpToPowerOfTwoOrZero(size_t n)
return n;
}
template <typename T>
inline size_t getLeadingZeroBits(T x)
{
if (!x)
return sizeof(x) * 8;
if constexpr (sizeof(T) <= sizeof(unsigned int))
{
return __builtin_clz(x);
}
else if constexpr (sizeof(T) <= sizeof(unsigned long int))
{
return __builtin_clzl(x);
}
else
{
return __builtin_clzll(x);
}
}
template <typename T>
inline size_t getTrailingZeroBits(T x)
{
if (!x)
return sizeof(x) * 8;
if constexpr (sizeof(T) <= sizeof(unsigned int))
{
return __builtin_ctz(x);
}
else if constexpr (sizeof(T) <= sizeof(unsigned long int))
{
return __builtin_ctzl(x);
}
else
{
return __builtin_ctzll(x);
}
}
/** Returns a mask that has '1' for `bits` LSB set:
* maskLowBits<UInt8>(3) => 00000111
*/
template <typename T>
inline T maskLowBits(unsigned char bits)
{
if (bits == 0)
{
return 0;
}
T result = static_cast<T>(~T{0});
if (bits < sizeof(T) * 8)
{
result = static_cast<T>(result >> (sizeof(T) * 8 - bits));
}
return result;
}

View File

@ -0,0 +1,325 @@
#include <Compression/CompressionCodecDoubleDelta.h>
#include <Compression/CompressionInfo.h>
#include <Compression/CompressionFactory.h>
#include <common/unaligned.h>
#include <Parsers/IAST_fwd.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/BitHelpers.h>
#include <IO/WriteHelpers.h>
#include <string.h>
#include <algorithm>
#include <cstdlib>
#include <type_traits>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
}
namespace
{
UInt32 getDeltaTypeByteSize(UInt8 data_bytes_size)
{
// both delta and double delta can be twice the size of data item, but not less than 32 bits and not more that 64.
return std::min(64/8, std::max(32/8, data_bytes_size * 2));
}
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
{
const UInt8 items_count_size = 4;
return items_count_size + data_bytes_size + getDeltaTypeByteSize(data_bytes_size);
}
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
{
const UInt32 items_count = uncompressed_size / data_bytes_size;
// 11111 + max 64 bits of double delta.
const UInt32 max_item_size_bits = 5 + getDeltaTypeByteSize(data_bytes_size) * 8;
// + 8 is to round up to next byte.
return (items_count * max_item_size_bits + 8) / 8;
}
struct WriteSpec
{
const UInt8 prefix_bits;
const UInt8 prefix;
const UInt8 data_bits;
};
template <typename T>
WriteSpec getWriteSpec(const T & value)
{
if (value > -63 && value < 64)
{
return WriteSpec{2, 0b10, 7};
}
else if (value > -255 && value < 256)
{
return WriteSpec{3, 0b110, 9};
}
else if (value > -2047 && value < 2048)
{
return WriteSpec{4, 0b1110, 12};
}
else if (value > std::numeric_limits<Int32>::min() && value < std::numeric_limits<Int32>::max())
{
return WriteSpec{5, 0b11110, 32};
}
else
{
return WriteSpec{5, 0b11111, 64};
}
}
template <typename T, typename DeltaType>
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
{
using UnsignedDeltaType = typename std::make_unsigned<DeltaType>::type;
if (source_size % sizeof(T) != 0)
throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
const char * source_end = source + source_size;
const UInt32 items_count = source_size / sizeof(T);
unalignedStore(dest, items_count);
dest += sizeof(items_count);
T prev_value{};
DeltaType prev_delta{};
if (source < source_end)
{
prev_value = unalignedLoad<T>(source);
unalignedStore(dest, prev_value);
source += sizeof(prev_value);
dest += sizeof(prev_value);
}
if (source < source_end)
{
const T curr_value = unalignedLoad<T>(source);
prev_delta = static_cast<DeltaType>(curr_value - prev_value);
unalignedStore(dest, prev_delta);
source += sizeof(curr_value);
dest += sizeof(prev_delta);
prev_value = curr_value;
}
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(T), source_size - sizeof(T)*2));
BitWriter writer(buffer);
for (; source < source_end; source += sizeof(T))
{
const T curr_value = unalignedLoad<T>(source);
const auto delta = curr_value - prev_value;
const DeltaType double_delta = static_cast<DeltaType>(delta - static_cast<T>(prev_delta));
prev_delta = delta;
prev_value = curr_value;
if (double_delta == 0)
{
writer.writeBits(1, 0);
}
else
{
const auto sign = std::signbit(double_delta);
const auto abs_value = static_cast<UnsignedDeltaType>(std::abs(double_delta));
const auto write_spec = getWriteSpec(double_delta);
writer.writeBits(write_spec.prefix_bits, write_spec.prefix);
writer.writeBits(1, sign);
writer.writeBits(write_spec.data_bits - 1, abs_value);
}
}
writer.flush();
return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + buffer.count();
}
template <typename T, typename DeltaType>
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
{
const char * source_end = source + source_size;
const UInt32 items_count = unalignedLoad<UInt32>(source);
source += sizeof(items_count);
T prev_value{};
DeltaType prev_delta{};
if (source < source_end)
{
prev_value = unalignedLoad<T>(source);
unalignedStore(dest, prev_value);
source += sizeof(prev_value);
dest += sizeof(prev_value);
}
if (source < source_end)
{
prev_delta = unalignedLoad<DeltaType>(source);
prev_value = static_cast<T>(prev_value + prev_delta);
unalignedStore(dest, prev_value);
source += sizeof(prev_delta);
dest += sizeof(prev_value);
}
ReadBufferFromMemory buffer(source, source_size - sizeof(prev_value) - sizeof(prev_delta) - sizeof(items_count));
BitReader reader(buffer);
// since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
// we have to keep track of items to avoid reading more that there is.
for (UInt32 items_read = 2; items_read < items_count && !reader.eof(); ++items_read)
{
DeltaType double_delta = 0;
if (reader.readBit() == 1)
{
const UInt8 data_sizes[] = {6, 8, 11, 31, 63};
UInt8 i = 0;
for (; i < sizeof(data_sizes) - 1; ++i)
{
const auto next_bit = reader.readBit();
if (next_bit == 0)
break;
}
const UInt8 sign = reader.readBit();
double_delta = static_cast<DeltaType>(reader.readBits(data_sizes[i]));
if (sign)
{
double_delta *= -1;
}
}
// else if first bit is zero, no need to read more data.
const T curr_value = static_cast<T>(prev_value + prev_delta + double_delta);
unalignedStore(dest, curr_value);
dest += sizeof(curr_value);
prev_delta = curr_value - prev_value;
prev_value = curr_value;
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
{
UInt8 data_bytes_size = 1;
if (column_type && column_type->haveMaximumSizeOfValue())
{
size_t max_size = column_type->getSizeOfValueInMemory();
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
data_bytes_size = static_cast<UInt8>(max_size);
}
return data_bytes_size;
}
}
CompressionCodecDoubleDelta::CompressionCodecDoubleDelta(UInt8 data_bytes_size_)
: data_bytes_size(data_bytes_size_)
{
}
UInt8 CompressionCodecDoubleDelta::getMethodByte() const
{
return static_cast<UInt8>(CompressionMethodByte::DoubleDelta);
}
String CompressionCodecDoubleDelta::getCodecDesc() const
{
return "DoubleDelta";
}
UInt32 CompressionCodecDoubleDelta::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{
const auto result = 2 // common header
+ data_bytes_size // max bytes skipped if source is not properly aligned.
+ getCompressedHeaderSize(data_bytes_size) // data-specific header
+ getCompressedDataSize(data_bytes_size, uncompressed_size);
return result;
}
UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
UInt8 bytes_to_skip = source_size % data_bytes_size;
dest[0] = data_bytes_size;
dest[1] = bytes_to_skip;
memcpy(&dest[2], source, bytes_to_skip);
size_t start_pos = 2 + bytes_to_skip;
UInt32 compressed_size = 0;
switch (data_bytes_size)
{
case 1:
compressed_size = compressDataForType<UInt8, Int16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 2:
compressed_size = compressDataForType<UInt16, Int32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 4:
compressed_size = compressDataForType<UInt32, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 8:
compressed_size = compressDataForType<UInt64, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
}
return 1 + 1 + compressed_size;
}
void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 /* uncompressed_size */) const
{
UInt8 bytes_size = source[0];
UInt8 bytes_to_skip = source[1];
memcpy(dest, &source[2], bytes_to_skip);
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
switch (bytes_size)
{
case 1:
decompressDataForType<UInt8, Int16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 2:
decompressDataForType<UInt16, Int32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 4:
decompressDataForType<UInt32, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 8:
decompressDataForType<UInt64, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
}
}
void CompressionCodecDoubleDelta::useInfoAboutType(DataTypePtr data_type)
{
data_bytes_size = getDataBytesSize(data_type);
}
void registerCodecDoubleDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::DoubleDelta);
factory.registerCompressionCodecWithType("DoubleDelta", method_code, [&](const ASTPtr &, DataTypePtr column_type) -> CompressionCodecPtr
{
UInt8 delta_bytes_size = getDataBytesSize(column_type);
return std::make_shared<CompressionCodecDoubleDelta>(delta_bytes_size);
});
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Compression/ICompressionCodec.h>
namespace DB
{
class CompressionCodecDoubleDelta : public ICompressionCodec
{
public:
CompressionCodecDoubleDelta(UInt8 data_bytes_size_);
UInt8 getMethodByte() const override;
String getCodecDesc() const override;
void useInfoAboutType(DataTypePtr data_type) override;
protected:
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
private:
UInt8 data_bytes_size;
};
}

View File

@ -0,0 +1,326 @@
#include <Compression/CompressionCodecGorilla.h>
#include <Compression/CompressionInfo.h>
#include <Compression/CompressionFactory.h>
#include <common/unaligned.h>
#include <Parsers/IAST_fwd.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/BitHelpers.h>
#include <string.h>
#include <algorithm>
#include <cstdlib>
#include <type_traits>
#include <bitset>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int ILLEGAL_CODEC_PARAMETER;
}
namespace
{
constexpr inline UInt8 getBitLengthOfLength(UInt8 data_bytes_size)
{
// 1-byte value is 8 bits, and we need 4 bits to represent 8 : 1000,
// 2-byte 16 bits => 5
// 4-byte 32 bits => 6
// 8-byte 64 bits => 7
const UInt8 bit_lengths[] = {0, 4, 5, 0, 6, 0, 0, 0, 7};
assert(data_bytes_size >= 1 && data_bytes_size < sizeof(bit_lengths) && bit_lengths[data_bytes_size] != 0);
return bit_lengths[data_bytes_size];
}
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
{
const UInt8 items_count_size = 4;
return items_count_size + data_bytes_size;
}
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
{
const UInt32 items_count = uncompressed_size / data_bytes_size;
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(data_bytes_size);
// -1 since there must be at least 1 non-zero bit.
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
// worst case (for 32-bit value):
// 11 + 5 bits of leading zeroes bit-size + 5 bits of data bit-size + non-zero data bits.
const UInt32 max_item_size_bits = 2 + LEADING_ZEROES_BIT_LENGTH + DATA_BIT_LENGTH + data_bytes_size * 8;
// + 8 is to round up to next byte.
return (items_count * max_item_size_bits + 8) / 8;
}
struct binary_value_info
{
UInt8 leading_zero_bits;
UInt8 data_bits;
UInt8 trailing_zero_bits;
};
template <typename T>
binary_value_info getLeadingAndTrailingBits(const T & value)
{
constexpr UInt8 bit_size = sizeof(T) * 8;
const UInt8 lz = getLeadingZeroBits(value);
const UInt8 tz = getTrailingZeroBits(value);
const UInt8 data_size = value == 0 ? 0 : static_cast<UInt8>(bit_size - lz - tz);
return binary_value_info{lz, data_size, tz};
}
template <typename T>
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
{
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
// -1 since there must be at least 1 non-zero bit.
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
if (source_size % sizeof(T) != 0)
throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
const char * source_end = source + source_size;
const UInt32 items_count = source_size / sizeof(T);
unalignedStore(dest, items_count);
dest += sizeof(items_count);
T prev_value{};
// That would cause first XORed value to be written in-full.
binary_value_info prev_xored_info{0, 0, 0};
if (source < source_end)
{
prev_value = unalignedLoad<T>(source);
unalignedStore(dest, prev_value);
source += sizeof(prev_value);
dest += sizeof(prev_value);
}
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(T), source_size - sizeof(items_count) - sizeof(prev_value)));
BitWriter writer(buffer);
while (source < source_end)
{
const T curr_value = unalignedLoad<T>(source);
source += sizeof(curr_value);
const auto xored_data = curr_value ^ prev_value;
const binary_value_info curr_xored_info = getLeadingAndTrailingBits(xored_data);
if (xored_data == 0)
{
writer.writeBits(1, 0);
}
else if (prev_xored_info.data_bits != 0
&& prev_xored_info.leading_zero_bits <= curr_xored_info.leading_zero_bits
&& prev_xored_info.trailing_zero_bits <= curr_xored_info.trailing_zero_bits)
{
writer.writeBits(2, 0b10);
writer.writeBits(prev_xored_info.data_bits, xored_data >> prev_xored_info.trailing_zero_bits);
}
else
{
writer.writeBits(2, 0b11);
writer.writeBits(LEADING_ZEROES_BIT_LENGTH, curr_xored_info.leading_zero_bits);
writer.writeBits(DATA_BIT_LENGTH, curr_xored_info.data_bits);
writer.writeBits(curr_xored_info.data_bits, xored_data >> curr_xored_info.trailing_zero_bits);
prev_xored_info = curr_xored_info;
}
prev_value = curr_value;
}
writer.flush();
return sizeof(items_count) + sizeof(prev_value) + buffer.count();
}
template <typename T>
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
{
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
// -1 since there must be at least 1 non-zero bit.
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
const char * source_end = source + source_size;
const UInt32 items_count = unalignedLoad<UInt32>(source);
source += sizeof(items_count);
T prev_value{};
if (source < source_end)
{
prev_value = unalignedLoad<T>(source);
unalignedStore(dest, prev_value);
source += sizeof(prev_value);
dest += sizeof(prev_value);
}
ReadBufferFromMemory buffer(source, source_size - sizeof(items_count) - sizeof(prev_value));
BitReader reader(buffer);
binary_value_info prev_xored_info{0, 0, 0};
// since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
// we have to keep track of items to avoid reading more that there is.
for (UInt32 items_read = 1; items_read < items_count && !reader.eof(); ++items_read)
{
T curr_value = prev_value;
binary_value_info curr_xored_info = prev_xored_info;
T xored_data{};
if (reader.readBit() == 1)
{
if (reader.readBit() == 1)
{
// 0b11 prefix
curr_xored_info.leading_zero_bits = reader.readBits(LEADING_ZEROES_BIT_LENGTH);
curr_xored_info.data_bits = reader.readBits(DATA_BIT_LENGTH);
curr_xored_info.trailing_zero_bits = sizeof(T) * 8 - curr_xored_info.leading_zero_bits - curr_xored_info.data_bits;
}
// else: 0b10 prefix - use prev_xored_info
if (curr_xored_info.leading_zero_bits == 0
&& curr_xored_info.data_bits == 0
&& curr_xored_info.trailing_zero_bits == 0)
{
throw Exception("Cannot decompress gorilla-encoded data: corrupted input data.",
ErrorCodes::CANNOT_DECOMPRESS);
}
xored_data = reader.readBits(curr_xored_info.data_bits);
xored_data <<= curr_xored_info.trailing_zero_bits;
curr_value = prev_value ^ xored_data;
}
// else: 0b0 prefix - use prev_value
unalignedStore(dest, curr_value);
dest += sizeof(curr_value);
prev_xored_info = curr_xored_info;
prev_value = curr_value;
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
{
UInt8 delta_bytes_size = 1;
if (column_type && column_type->haveMaximumSizeOfValue())
{
size_t max_size = column_type->getSizeOfValueInMemory();
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
delta_bytes_size = static_cast<UInt8>(max_size);
}
return delta_bytes_size;
}
}
CompressionCodecGorilla::CompressionCodecGorilla(UInt8 data_bytes_size_)
: data_bytes_size(data_bytes_size_)
{
}
UInt8 CompressionCodecGorilla::getMethodByte() const
{
return static_cast<UInt8>(CompressionMethodByte::Gorilla);
}
String CompressionCodecGorilla::getCodecDesc() const
{
return "Gorilla";
}
UInt32 CompressionCodecGorilla::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{
const auto result = 2 // common header
+ data_bytes_size // max bytes skipped if source is not properly aligned.
+ getCompressedHeaderSize(data_bytes_size) // data-specific header
+ getCompressedDataSize(data_bytes_size, uncompressed_size);
return result;
}
UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
UInt8 bytes_to_skip = source_size % data_bytes_size;
dest[0] = data_bytes_size;
dest[1] = bytes_to_skip;
memcpy(&dest[2], source, bytes_to_skip);
size_t start_pos = 2 + bytes_to_skip;
UInt32 compressed_size = 0;
switch (data_bytes_size)
{
case 1:
compressed_size = compressDataForType<UInt8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 2:
compressed_size = compressDataForType<UInt16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 4:
compressed_size = compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case 8:
compressed_size = compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
}
return 1 + 1 + compressed_size;
}
void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 /* uncompressed_size */) const
{
UInt8 bytes_size = source[0];
UInt8 bytes_to_skip = source[1];
memcpy(dest, &source[2], bytes_to_skip);
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
switch (bytes_size)
{
case 1:
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 2:
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 4:
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
case 8:
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
break;
}
}
void CompressionCodecGorilla::useInfoAboutType(DataTypePtr data_type)
{
data_bytes_size = getDataBytesSize(data_type);
}
void registerCodecGorilla(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Gorilla);
factory.registerCompressionCodecWithType("Gorilla", method_code, [&](const ASTPtr &, DataTypePtr column_type) -> CompressionCodecPtr
{
UInt8 delta_bytes_size = getDataBytesSize(column_type);
return std::make_shared<CompressionCodecGorilla>(delta_bytes_size);
});
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Compression/ICompressionCodec.h>
namespace DB
{
class CompressionCodecGorilla : public ICompressionCodec
{
public:
CompressionCodecGorilla(UInt8 data_bytes_size_);
UInt8 getMethodByte() const override;
String getCodecDesc() const override;
void useInfoAboutType(DataTypePtr data_type) override;
protected:
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
private:
UInt8 data_bytes_size;
};
}

View File

@ -138,6 +138,8 @@ void registerCodecMultiple(CompressionCodecFactory & factory);
void registerCodecLZ4HC(CompressionCodecFactory & factory);
void registerCodecDelta(CompressionCodecFactory & factory);
void registerCodecT64(CompressionCodecFactory & factory);
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
void registerCodecGorilla(CompressionCodecFactory & factory);
CompressionCodecFactory::CompressionCodecFactory()
{
@ -149,6 +151,8 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecLZ4HC(*this);
registerCodecDelta(*this);
registerCodecT64(*this);
registerCodecDoubleDelta(*this);
registerCodecGorilla(*this);
}
}

View File

@ -41,6 +41,8 @@ enum class CompressionMethodByte : uint8_t
Multiple = 0x91,
Delta = 0x92,
T64 = 0x93,
DoubleDelta = 0x94,
Gorilla = 0x95,
};
}

View File

@ -29,7 +29,7 @@ namespace ErrorCodes
}
UInt32 ICompressionCodec::compress(char * source, UInt32 source_size, char * dest) const
UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
{
dest[0] = getMethodByte();
UInt8 header_size = getHeaderSize();
@ -41,7 +41,7 @@ UInt32 ICompressionCodec::compress(char * source, UInt32 source_size, char * des
}
UInt32 ICompressionCodec::decompress(char * source, UInt32 source_size, char * dest) const
UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
{
UInt8 method = source[0];
if (method != getMethodByte())

View File

@ -35,10 +35,10 @@ public:
virtual String getCodecDesc() const = 0;
/// Compressed bytes from uncompressed source to dest. Dest should preallocate memory
virtual UInt32 compress(char * source, UInt32 source_size, char * dest) const;
virtual UInt32 compress(const char * source, UInt32 source_size, char * dest) const;
/// Decompress bytes from compressed source to dest. Dest should preallocate memory
virtual UInt32 decompress(char * source, UInt32 source_size, char * dest) const;
virtual UInt32 decompress(const char * source, UInt32 source_size, char * dest) const;
/// Number of bytes, that will be used to compress uncompressed_size bytes with current codec
virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { return getHeaderSize() + getMaxCompressedDataSize(uncompressed_size); }

View File

@ -0,0 +1,385 @@
#include <Compression/CompressionCodecDoubleDelta.h>
#include <Compression/CompressionCodecGorilla.h>
#include <Core/Types.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromMemory.h>
#include <Common/PODArray.h>
#include <boost/format.hpp>
#include <cmath>
#include <initializer_list>
#include <iomanip>
#include <memory>
#include <vector>
#include <typeinfo>
#include <iterator>
#include <optional>
#include <iostream>
#include <bitset>
#include <string.h>
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wundef"
#endif
#include <gtest/gtest.h>
using namespace DB;
template <typename T>
std::string bin(const T & value, size_t bits = sizeof(T)*8)
{
static const UInt8 MAX_BITS = sizeof(T)*8;
assert(bits <= MAX_BITS);
return std::bitset<sizeof(T) * 8>(static_cast<unsigned long long>(value))
.to_string().substr(MAX_BITS - bits, bits);
}
template <typename T>
const char* type_name()
{
return typeid(T).name();
}
template <>
const char* type_name<UInt32>()
{
return "uint32";
}
template <>
const char* type_name<Int32>()
{
return "int32";
}
template <>
const char* type_name<UInt64>()
{
return "uint64";
}
template <>
const char* type_name<Int64>()
{
return "int64";
}
template <>
const char* type_name<Float32>()
{
return "float";
}
template <>
const char* type_name<Float64>()
{
return "double";
}
template <typename T, typename ContainerLeft, typename ContainerRight>
::testing::AssertionResult EqualByteContainersAs(const ContainerLeft & left, const ContainerRight & right)
{
static_assert(sizeof(typename ContainerLeft::value_type) == 1, "Expected byte-container");
static_assert(sizeof(typename ContainerRight::value_type) == 1, "Expected byte-container");
::testing::AssertionResult result = ::testing::AssertionSuccess();
ReadBufferFromMemory left_read_buffer(left.data(), left.size());
ReadBufferFromMemory right_read_buffer(right.data(), right.size());
const auto l_size = left.size() / sizeof(T);
const auto r_size = right.size() / sizeof(T);
const auto size = std::min(l_size, r_size);
if (l_size != r_size)
{
result = ::testing::AssertionFailure() << "size mismatch" << " expected: " << l_size << " got:" << r_size;
}
const auto MAX_MISMATCHING_ITEMS = 5;
int mismatching_items = 0;
for (int i = 0; i < size; ++i)
{
T left_value{};
left_read_buffer.readStrict(reinterpret_cast<char*>(&left_value), sizeof(left_value));
T right_value{};
right_read_buffer.readStrict(reinterpret_cast<char*>(&right_value), sizeof(right_value));
if (left_value != right_value)
{
if (result)
{
result = ::testing::AssertionFailure();
}
result << "mismatching " << sizeof(T) << "-byte item #" << i
<< "\nexpected: " << bin(left_value)
<< "\ngot : " << bin(right_value)
<< std::endl;
if (++mismatching_items >= MAX_MISMATCHING_ITEMS)
{
result << "..." << std::endl;
break;
}
}
}
return result;
}
struct CodecTestParam
{
std::vector<char> source_data;
UInt8 data_byte_size;
std::string case_name;
};
std::ostream & operator<<(std::ostream & ostr, const CodecTestParam & param)
{
return ostr << "name: " << param.case_name
<< "\nbyte size: " << static_cast<UInt32>(param.data_byte_size)
<< "\ndata size: " << param.source_data.size();
}
template <typename T, typename... Args>
CodecTestParam makeParam(Args && ... args)
{
std::initializer_list<T> vals{static_cast<T>(args)...};
std::vector<char> data(sizeof(T) * std::size(vals));
char * write_pos = data.data();
for (const auto & v : vals)
{
unalignedStore<T>(write_pos, v);
write_pos += sizeof(v);
}
return CodecTestParam{std::move(data), sizeof(T),
(boost::format("%1% %2%") % (sizeof(T) * std::size(vals)) % " predefined values").str()};
}
template <typename T, size_t Begin = 1, size_t End = 10000, typename Generator>
CodecTestParam generateParam(Generator gen, const char* gen_name)
{
static_assert (End >= Begin, "End must be not less than Begin");
std::vector<char> data(sizeof(T) * (End - Begin));
char * write_pos = data.data();
for (size_t i = Begin; i < End; ++i)
{
const T v = gen(static_cast<T>(i));
unalignedStore<T>(write_pos, v);
write_pos += sizeof(v);
}
return CodecTestParam{std::move(data), sizeof(T),
(boost::format("%1% from %2% (%3% => %4%)") % type_name<T>() % gen_name % Begin % End).str()};
}
void TestTranscoding(ICompressionCodec * codec, const CodecTestParam & param)
{
const auto & source_data = param.source_data;
const UInt32 encoded_max_size = codec->getCompressedReserveSize(source_data.size());
PODArray<char> encoded(encoded_max_size);
const UInt32 encoded_size = codec->compress(source_data.data(), source_data.size(), encoded.data());
encoded.resize(encoded_size);
PODArray<char> decoded(source_data.size());
const UInt32 decoded_size = codec->decompress(encoded.data(), encoded.size(), decoded.data());
decoded.resize(decoded_size);
switch (param.data_byte_size)
{
case 1:
ASSERT_TRUE(EqualByteContainersAs<UInt8>(source_data, decoded));
break;
case 2:
ASSERT_TRUE(EqualByteContainersAs<UInt16>(source_data, decoded));
break;
case 4:
ASSERT_TRUE(EqualByteContainersAs<UInt32>(source_data, decoded));
break;
case 8:
ASSERT_TRUE(EqualByteContainersAs<UInt64>(source_data, decoded));
break;
default:
FAIL() << "Invalid data_byte_size: " << param.data_byte_size;
}
}
class CodecTest : public ::testing::TestWithParam<CodecTestParam>
{
public:
static void SetUpTestCase()
{
// To make random predicatble and avoid failing test "out of the blue".
srand(0);
}
};
TEST_P(CodecTest, DoubleDelta)
{
const auto & param = GetParam();
auto codec = std::make_unique<CompressionCodecDoubleDelta>(param.data_byte_size);
TestTranscoding(codec.get(), param);
}
TEST_P(CodecTest, Gorilla)
{
const auto & param = GetParam();
auto codec = std::make_unique<CompressionCodecGorilla>(param.data_byte_size);
TestTranscoding(codec.get(), param);
}
auto SameValueGenerator = [](auto value)
{
return [=](auto i)
{
return static_cast<decltype(i)>(value);
};
};
auto SequentialGenerator = [](auto stride = 1)
{
return [=](auto i)
{
using ValueType = decltype(i);
return static_cast<ValueType>(stride * i);
};
};
template <typename T>
struct MonotonicGenerator
{
MonotonicGenerator(T stride = 1, size_t max_step = 10)
: prev_value{},
stride(stride),
max_step(max_step)
{}
template <typename U>
U operator()(U i)
{
if (!prev_value.has_value())
{
prev_value = i * stride;
}
const U result = *prev_value + static_cast<T>(stride * (rand() % max_step));
prev_value = result;
return result;
}
std::optional<T> prev_value;
const T stride;
const size_t max_step;
};
auto MinMaxGenerator = [](auto i)
{
if (i % 2 == 0)
{
return std::numeric_limits<decltype(i)>::min();
}
else
{
return std::numeric_limits<decltype(i)>::max();
}
};
auto RandomGenerator = [](auto i) {return static_cast<decltype(i)>(rand());};
auto RandomishGenerator = [](auto i)
{
return static_cast<decltype(i)>(sin(static_cast<double>(i) * i) * i);
};
INSTANTIATE_TEST_CASE_P(Basic,
CodecTest,
::testing::Values(
makeParam<UInt32>(1, 2, 3, 4),
makeParam<UInt64>(1, 2, 3, 4),
makeParam<Float32>(1.1, 2.2, 3.3, 4.4),
makeParam<Float64>(1.1, 2.2, 3.3, 4.4)
),
);
#define G(generator) generator, #generator
INSTANTIATE_TEST_CASE_P(Same,
CodecTest,
::testing::Values(
generateParam<UInt32>(G(SameValueGenerator(1000))),
generateParam<Int32>(G(SameValueGenerator(-1000))),
generateParam<UInt64>(G(SameValueGenerator(1000))),
generateParam<Int64>(G(SameValueGenerator(-1000))),
generateParam<Float32>(G(SameValueGenerator(M_E))),
generateParam<Float64>(G(SameValueGenerator(M_E)))
),
);
INSTANTIATE_TEST_CASE_P(Sequential,
CodecTest,
::testing::Values(
generateParam<UInt32>(G(SequentialGenerator(1))),
generateParam<Int32>(G(SequentialGenerator(-1))),
generateParam<UInt64>(G(SequentialGenerator(1))),
generateParam<Int64>(G(SequentialGenerator(-1))),
generateParam<Float32>(G(SequentialGenerator(M_E))),
generateParam<Float64>(G(SequentialGenerator(M_E)))
),
);
INSTANTIATE_TEST_CASE_P(Monotonic,
CodecTest,
::testing::Values(
generateParam<UInt32>(G(MonotonicGenerator<UInt32>(1, 5))),
generateParam<Int32>(G(MonotonicGenerator<Int32>(-1, 5))),
generateParam<UInt64>(G(MonotonicGenerator<UInt64>(1, 5))),
generateParam<Int64>(G(MonotonicGenerator<Int64>(-1, 5))),
generateParam<Float32>(G(MonotonicGenerator<Float32>(M_E, 5))),
generateParam<Float64>(G(MonotonicGenerator<Float64>(M_E, 5)))
),
);
INSTANTIATE_TEST_CASE_P(Random,
CodecTest,
::testing::Values(
generateParam<UInt32>(G(RandomGenerator)),
generateParam<UInt64>(G(RandomGenerator))
),
);
INSTANTIATE_TEST_CASE_P(RandomLike,
CodecTest,
::testing::Values(
generateParam<Int32>(G(RandomishGenerator)),
generateParam<Int64>(G(RandomishGenerator)),
generateParam<Float32>(G(RandomishGenerator)),
generateParam<Float64>(G(RandomishGenerator))
),
);
INSTANTIATE_TEST_CASE_P(Overflow,
CodecTest,
::testing::Values(
generateParam<UInt32>(G(MinMaxGenerator)),
generateParam<Int32>(G(MinMaxGenerator)),
generateParam<UInt64>(G(MinMaxGenerator)),
generateParam<Int64>(G(MinMaxGenerator))
),
);

178
dbms/src/IO/BitHelpers.h Normal file
View File

@ -0,0 +1,178 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Core/Types.h>
#include <Common/BitHelpers.h>
namespace DB
{
/** Reads data from underlying ReadBuffer bit by bit, max 64 bits at once.
*
* reads MSB bits first, imagine that you have a data:
* 11110000 10101010 00100100 11111110
*
* Given that r is BitReader created with a ReadBuffer that reads from data above:
* r.readBits(3) => 0b111
* r.readBit() => 0b1
* r.readBits(8) => 0b1010 // 4 leading zero-bits are not shown
* r.readBit() => 0b1
* r.readBit() => 0b0
* r.readBits(15) => 0b10001001001111111
* r.readBit() => 0b0
**/
class BitReader
{
ReadBuffer & buf;
UInt64 bits_buffer;
UInt8 bits_count;
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
public:
BitReader(ReadBuffer & buf_)
: buf(buf_),
bits_buffer(0),
bits_count(0)
{}
~BitReader()
{}
inline UInt64 readBits(UInt8 bits)
{
UInt64 result = 0;
bits = std::min(static_cast<UInt8>(sizeof(result) * 8), bits);
while (bits != 0)
{
if (bits_count == 0)
{
fillBuffer();
if (bits_count == 0)
{
// EOF.
break;
}
}
const auto to_read = std::min(bits, bits_count);
const UInt64 v = bits_buffer >> (bits_count - to_read);
const UInt64 mask = maskLowBits<UInt64>(to_read);
const UInt64 value = v & mask;
result |= value;
// unset bits that were read
bits_buffer &= ~(mask << (bits_count - to_read));
bits_count -= to_read;
bits -= to_read;
result <<= std::min(bits, BIT_BUFFER_SIZE);
}
return result;
}
inline UInt64 peekBits(UInt8 /*bits*/)
{
return 0;
}
inline UInt8 readBit()
{
return static_cast<UInt8>(readBits(1));
}
inline bool eof() const
{
return bits_count == 0 && buf.eof();
}
private:
void fillBuffer()
{
auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
bits_buffer = be64toh(bits_buffer);
bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
bits_count = static_cast<UInt8>(read) * 8;
}
};
class BitWriter
{
WriteBuffer & buf;
UInt64 bits_buffer;
UInt8 bits_count;
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
public:
BitWriter(WriteBuffer & buf_)
: buf(buf_),
bits_buffer(0),
bits_count(0)
{}
~BitWriter()
{
flush();
}
inline void writeBits(UInt8 bits, UInt64 value)
{
bits = std::min(static_cast<UInt8>(sizeof(value) * 8), bits);
while (bits > 0)
{
auto v = value;
auto to_write = bits;
const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
if (capacity < bits)
{
v >>= bits - capacity;
to_write = capacity;
}
const UInt64 mask = maskLowBits<UInt64>(to_write);
v &= mask;
// assert(v <= 255);
bits_buffer <<= to_write;
bits_buffer |= v;
bits_count += to_write;
if (bits_count < BIT_BUFFER_SIZE)
break;
doFlush();
bits -= to_write;
}
}
inline void flush()
{
if (bits_count != 0)
{
bits_buffer <<= (BIT_BUFFER_SIZE - bits_count);
doFlush();
}
}
private:
void doFlush()
{
bits_buffer = htobe64(bits_buffer);
buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);
bits_count = 0;
bits_buffer = 0;
}
};
}

View File

@ -0,0 +1,213 @@
#include <string.h>
#include <IO/BitHelpers.h>
#include <Core/Types.h>
#include <IO/MemoryReadWriteBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <Common/BitHelpers.h>
#include <Common/PODArray.h>
#include <cmath>
#include <iomanip>
#include <memory>
#include <bitset>
#include <string>
#include <vector>
#include <typeinfo>
#include <iostream>
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wundef"
#endif
#include <gtest/gtest.h>
using namespace DB;
// Intentionally asymmetric both byte and word-size to detect read and write inconsistencies
// each prime bit is set to 0.
// v-61 v-53 v-47 v-41 v-37 v-31 v-23 v-17 v-11 v-5
const UInt64 BIT_PATTERN = 0b11101011'11101111'10111010'11101111'10101111'10111010'11101011'10101001;
const UInt8 PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61};
template <typename T>
std::string bin(const T & value, size_t bits = sizeof(T)*8)
{
static const UInt8 MAX_BITS = sizeof(T)*8;
assert(bits <= MAX_BITS);
return std::bitset<sizeof(T) * 8>(static_cast<unsigned long long>(value))
.to_string().substr(MAX_BITS - bits, bits);
}
template <typename T>
T getBits(UInt8 bits, const T & value)
{
const T mask = ((static_cast<T>(1) << static_cast<T>(bits)) - 1);
return value & mask;
}
template <typename T>
std::ostream & dumpBuffer(const T begin,
const T end,
std::ostream * destination,
const char* col_sep = " ",
const char* row_sep = "\n",
const size_t cols_in_row = 8,
UInt32 max_bytes = 0xFFFFFFFF)
{
size_t col = 0;
for (auto p = begin; p < end && p - begin < max_bytes; ++p)
{
*destination << bin(*p);
if (++col % cols_in_row == 0)
{
if (row_sep)
*destination << row_sep;
}
else if (col_sep)
{
*destination << col_sep;
}
}
return *destination;
}
template <typename T>
std::string dumpContents(const T& container,
const char* col_sep = " ",
const char* row_sep = "\n",
const size_t cols_in_row = 8)
{
std::stringstream sstr;
dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);
return sstr.str();
}
struct TestCaseParameter
{
std::vector<std::pair<UInt8, UInt64>> bits_and_vals;
std::string expected_buffer_binary;
explicit TestCaseParameter(std::vector<std::pair<UInt8, UInt64>> vals, std::string binary = std::string{})
: bits_and_vals(std::move(vals)),
expected_buffer_binary(binary)
{}
};
class BitIO : public ::testing::TestWithParam<TestCaseParameter>
{};
TEST_P(BitIO, WriteAndRead)
{
const auto & param = GetParam();
const auto & bits_and_vals = param.bits_and_vals;
const auto & expected_buffer_binary = param.expected_buffer_binary;
UInt64 max_buffer_size = 0;
for (const auto & bv : bits_and_vals)
{
max_buffer_size += bv.first;
}
max_buffer_size = (max_buffer_size + 7) / 8;
SCOPED_TRACE(max_buffer_size);
PODArray<char> data(max_buffer_size);
{
WriteBuffer write_buffer(data.data(), data.size());
BitWriter writer(write_buffer);
for (const auto & bv : bits_and_vals)
{
writer.writeBits(bv.first, bv.second);
}
writer.flush();
}
{
ReadBufferFromMemory read_buffer(data.data(), data.size());
// auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
if (expected_buffer_binary != std::string{})
{
const auto actual_buffer_binary = dumpContents(data, " ", " ");
ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
}
BitReader reader(read_buffer);
int item = 0;
for (const auto & bv : bits_and_vals)
{
SCOPED_TRACE(::testing::Message()
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
<< ", value: " << bin(bv.second)
<< ".\n\n\nBuffer memory:\n" << dumpContents(data));
//EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));
++item;
}
}
}
INSTANTIATE_TEST_CASE_P(Simple,
BitIO,
::testing::Values(
TestCaseParameter(
{{9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}},
"11111111 10000000 00111111 11100000 00001111 11111000 "),
TestCaseParameter(
{{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
"01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
TestCaseParameter({{24, 0xFFFFFFFF}},
"11111111 11111111 11111111 ")
),);
TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
{
std::vector<std::pair<UInt8, UInt64>> test_data;
{
for (UInt8 r = 0; r < repeat_times; ++r)
{
for (const auto p : PRIMES)
{
test_data.emplace_back(p, pattern);
}
}
}
return TestCaseParameter(test_data);
}
INSTANTIATE_TEST_CASE_P(Primes,
BitIO,
::testing::Values(
primes_case(11, 0xFFFFFFFFFFFFFFFFULL),
primes_case(11, BIT_PATTERN)
),);
TEST(BitHelpers, maskLowBits)
{
EXPECT_EQ(0b00000111, ::maskLowBits<UInt8>(3));
EXPECT_EQ(0b01111111, ::maskLowBits<UInt8>(7));
EXPECT_EQ(0b0000000001111111, ::maskLowBits<UInt16>(7));
EXPECT_EQ(0b0001111111111111, ::maskLowBits<UInt16>(13));
EXPECT_EQ(0b00000111111111111111111111111111, ::maskLowBits<UInt32>(27));
EXPECT_EQ(0b111111111111111111111111111111111, ::maskLowBits<UInt64>(33));
EXPECT_EQ(0b11111111111111111111111111111111111, ::maskLowBits<UInt64>(35));
EXPECT_EQ(0xFF, ::maskLowBits<UInt8>(8));
EXPECT_EQ(0xFFFF, ::maskLowBits<UInt16>(16));
EXPECT_EQ(0xFFFFFFFF, ::maskLowBits<UInt32>(32));
EXPECT_EQ(0xFFFFFFFFFFFFFFFF, ::maskLowBits<UInt64>(64));
}

View File

@ -14,9 +14,9 @@ Don't use Docker from your system repository.
* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip`
* [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest`
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf`
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf pytest-timeout`
(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-pymongo python-tzlocal python-kazoo python-psycopg2 python-kafka`
(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-pymongo python-tzlocal python-kazoo python-psycopg2 python-kafka python-pytest-timeout`
If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login.
(You must close all your sessions (for example, restart your computer))

View File

@ -34,7 +34,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce

View File

@ -1,3 +1,4 @@
[pytest]
python_files = test.py
norecursedirs = _instances
timeout = 600

View File

@ -260,24 +260,31 @@ def execute_task(task, cmd_options):
# Tests
@pytest.mark.skip(reason="Fails under asan")
def test_copy1_simple(started_cluster):
execute_task(Task1(started_cluster), [])
@pytest.mark.skip(reason="Fails under asan")
def test_copy1_with_recovering(started_cluster):
execute_task(Task1(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)])
@pytest.mark.skip(reason="Fails under asan")
def test_copy_month_to_week_partition(started_cluster):
execute_task(Task2(started_cluster), [])
@pytest.mark.skip(reason="Fails under asan")
def test_copy_month_to_week_partition_with_recovering(started_cluster):
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
@pytest.mark.skip(reason="Fails under asan")
def test_block_size(started_cluster):
execute_task(Task_test_block_size(started_cluster), [])
@pytest.mark.skip(reason="Fails under asan")
def test_no_index(started_cluster):
execute_task(Task_no_index(started_cluster), [])
@pytest.mark.skip(reason="Fails under asan")
def test_no_arg(started_cluster):
execute_task(Task_no_arg(started_cluster), [])

View File

@ -51,6 +51,7 @@ def test_deduplication_window_in_seconds(started_cluster):
# Currently this test just reproduce incorrect behavior that sould be fixed
@pytest.mark.skip(reason="Flapping test")
def test_deduplication_works_in_case_of_intensive_inserts(started_cluster):
inserters = []
fetchers = []

View File

@ -78,7 +78,7 @@ def test_reconnect(started_cluster):
assert remote.query("SELECT count(*) FROM local1").strip() == '3'
@pytest.mark.skip(reason="Flapping test")
def test_inserts_batching(started_cluster):
instance = instance_test_inserts_batching

View File

@ -318,7 +318,7 @@ def test_kafka_materialized_view(kafka_cluster):
DROP TABLE test.view;
''')
@pytest.mark.skip(reason="Hungs")
def test_kafka_flush_on_big_message(kafka_cluster):
# Create batchs of messages of size ~100Kb
kafka_messages = 10000

View File

@ -0,0 +1,67 @@
<test>
<name>IPv4 Functions</name>
<type>once</type>
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
<total_time_ms>2000</total_time_ms>
</any_of>
</stop_conditions>
<substitutions>
<substitution>
<name>table_suffix</name>
<values>
<value>dd</value>
<value>lz4</value>
<value>dd_lz4</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE IF NOT EXISTS seq_dd (n UInt64 CODEC(DoubleDelta, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_dd AS seq_dd;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_dd AS seq_dd;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS seq_lz4 (n UInt64 CODEC(LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_lz4 AS seq_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_lz4 AS seq_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS seq_dd_lz4 (n UInt64 CODEC(DoubleDelta, LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_dd_lz4 AS seq_dd_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_dd_lz4 AS seq_dd_lz4;</create_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT number FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT number*67+(rand()%67) FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT rand() FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<query>INSERT INTO seq_{table_suffix} (n) SELECT number FROM system.numbers SETTINGS max_threads=1</query>
<query>INSERT INTO mon_{table_suffix} (n) SELECT number*67+(rand()%67) FROM system.numbers SETTINGS max_threads=1</query>
<query>INSERT INTO rnd_{table_suffix} (n) SELECT rand() FROM system.numbers SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM seq_{table_suffix} SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM mon_{table_suffix} SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM rnd_{table_suffix} SETTINGS max_threads=1</query>
<drop_query>DROP TABLE IF EXISTS seq_{table_suffix}</drop_query>
<drop_query>DROP TABLE IF EXISTS mon_{table_suffix}</drop_query>
<drop_query>DROP TABLE IF EXISTS rnd_{table_suffix}</drop_query>
</test>

View File

@ -0,0 +1,67 @@
<test>
<name>IPv4 Functions</name>
<type>once</type>
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
<total_time_ms>2000</total_time_ms>
</any_of>
</stop_conditions>
<substitutions>
<substitution>
<name>table_suffix</name>
<values>
<value>g</value>
<value>lz4</value>
<value>g_lz4</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE IF NOT EXISTS seq_g (n UInt64 CODEC(Gorilla, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_g AS seq_g;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_g AS seq_g;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS seq_lz4 (n UInt64 CODEC(LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_lz4 AS seq_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_lz4 AS seq_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS seq_g_lz4 (n UInt64 CODEC(Gorilla, LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>CREATE TABLE IF NOT EXISTS mon_g_lz4 AS seq_g_lz4;</create_query>
<create_query>CREATE TABLE IF NOT EXISTS rnd_g_lz4 AS seq_g_lz4;</create_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT number/pi() FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT number+sin(number) FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT (rand() - 4294967295)/pi() FROM system.numbers LIMIT 100000</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
<query>INSERT INTO seq_{table_suffix} (n) SELECT number/pi() FROM system.numbers SETTINGS max_threads=1</query>
<query>INSERT INTO mon_{table_suffix} (n) SELECT number+sin(number) FROM system.numbers SETTINGS max_threads=1</query>
<query>INSERT INTO rnd_{table_suffix} (n) SELECT (rand() - 4294967295)/pi() FROM system.numbers SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM seq_{table_suffix} SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM mon_{table_suffix} SETTINGS max_threads=1</query>
<query>SELECT count(n) FROM rnd_{table_suffix} SETTINGS max_threads=1</query>
<drop_query>DROP TABLE IF EXISTS seq_{table_suffix}</drop_query>
<drop_query>DROP TABLE IF EXISTS mon_{table_suffix}</drop_query>
<drop_query>DROP TABLE IF EXISTS rnd_{table_suffix}</drop_query>
</test>

View File

@ -0,0 +1,10 @@
U64
U32
U16
U8
I64
I32
I16
I8
DT
D

View File

@ -0,0 +1,151 @@
USE test;
DROP TABLE IF EXISTS codecTest;
CREATE TABLE codecTest (
key UInt64,
ref_valueU64 UInt64,
ref_valueU32 UInt32,
ref_valueU16 UInt16,
ref_valueU8 UInt8,
ref_valueI64 Int64,
ref_valueI32 Int32,
ref_valueI16 Int16,
ref_valueI8 Int8,
ref_valueDT DateTime,
ref_valueD Date,
valueU64 UInt64 CODEC(DoubleDelta),
valueU32 UInt32 CODEC(DoubleDelta),
valueU16 UInt16 CODEC(DoubleDelta),
valueU8 UInt8 CODEC(DoubleDelta),
valueI64 Int64 CODEC(DoubleDelta),
valueI32 Int32 CODEC(DoubleDelta),
valueI16 Int16 CODEC(DoubleDelta),
valueI8 Int8 CODEC(DoubleDelta),
valueDT DateTime CODEC(DoubleDelta),
valueD Date CODEC(DoubleDelta)
) Engine = MergeTree ORDER BY key;
-- checking for overflow
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueI64, valueI64)
VALUES (101, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807), (202, 0, 0, -9223372036854775808, -9223372036854775808), (203, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807);
-- n^3 covers all double delta storage cases, from small difference between neighbouref_values (stride) to big.
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
SELECT number as n, n * n * n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
FROM system.numbers LIMIT 101, 100;
-- best case - constant stride
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
SELECT number as n, n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
FROM system.numbers LIMIT 201, 100;
-- worst case - random stride
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
SELECT number as n, n + (rand64() - 9223372036854775807)/1000 as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
FROM system.numbers LIMIT 301, 100;
SELECT 'U64';
SELECT
key,
ref_valueU64, valueU64, ref_valueU64 - valueU64 as dU64
FROM codecTest
WHERE
dU64 != 0
LIMIT 10;
SELECT 'U32';
SELECT
key,
ref_valueU32, valueU32, ref_valueU32 - valueU32 as dU32
FROM codecTest
WHERE
dU32 != 0
LIMIT 10;
SELECT 'U16';
SELECT
key,
ref_valueU16, valueU16, ref_valueU16 - valueU16 as dU16
FROM codecTest
WHERE
dU16 != 0
LIMIT 10;
SELECT 'U8';
SELECT
key,
ref_valueU8, valueU8, ref_valueU8 - valueU8 as dU8
FROM codecTest
WHERE
dU8 != 0
LIMIT 10;
SELECT 'I64';
SELECT
key,
ref_valueI64, valueI64, ref_valueI64 - valueI64 as dI64
FROM codecTest
WHERE
dI64 != 0
LIMIT 10;
SELECT 'I32';
SELECT
key,
ref_valueI32, valueI32, ref_valueI32 - valueI32 as dI32
FROM codecTest
WHERE
dI32 != 0
LIMIT 10;
SELECT 'I16';
SELECT
key,
ref_valueI16, valueI16, ref_valueI16 - valueI16 as dI16
FROM codecTest
WHERE
dI16 != 0
LIMIT 10;
SELECT 'I8';
SELECT
key,
ref_valueI8, valueI8, ref_valueI8 - valueI8 as dI8
FROM codecTest
WHERE
dI8 != 0
LIMIT 10;
SELECT 'DT';
SELECT
key,
ref_valueDT, valueDT, ref_valueDT - valueDT as dDT
FROM codecTest
WHERE
dDT != 0
LIMIT 10;
SELECT 'D';
SELECT
key,
ref_valueD, valueD, ref_valueD - valueD as dD
FROM codecTest
WHERE
dD != 0
LIMIT 10;
DROP TABLE IF EXISTS codecTest;

View File

@ -0,0 +1,2 @@
F64
F32

View File

@ -0,0 +1,63 @@
USE test;
DROP TABLE IF EXISTS codecTest;
CREATE TABLE codecTest (
key UInt64,
name String,
ref_valueF64 Float64,
ref_valueF32 Float32,
valueF64 Float64 CODEC(Gorilla),
valueF32 Float32 CODEC(Gorilla)
) Engine = MergeTree ORDER BY key;
-- best case - same value
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
-- good case - values that grow insignificantly
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
-- bad case - values differ significantly
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
-- worst case - almost like a random values
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
-- referencing previous row key, value, and case name to simplify debugging.
SELECT 'F64';
SELECT
c1.key, c1.name,
c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
'prev:',
c2.key, c2.ref_valueF64
FROM
codecTest as c1, codecTest as c2
WHERE
dF64 != 0
AND
c2.key = c1.key - 1
LIMIT 10;
SELECT 'F32';
SELECT
c1.key, c1.name,
c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
'prev:',
c2.key, c2.ref_valueF32
FROM
codecTest as c1, codecTest as c2
WHERE
dF32 != 0
AND
c2.key = c1.key - 1
LIMIT 10;
DROP TABLE IF EXISTS codecTest;

View File

@ -1,6 +1,6 @@
#!/bin/sh -e
. /usr/share/debconf/confmodule
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
db_fget clickhouse-server/default-password seen || true
password_seen="$RET"

View File

@ -11,17 +11,25 @@ CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
OS=${OS=`lsb_release -is 2>/dev/null || uname -s ||:`}
OS=${OS=`lsb_release -is 2>/dev/null ||:`}
if [ -z "$OS" ]; then
test -f /etc/os-release && . /etc/os-release && OS=$ID
fi
OS=${OS=`uname -s ||:`}
. /usr/share/debconf/confmodule
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
test -f /etc/default/clickhouse && . /etc/default/clickhouse
if [ "$1" = configure ]; then
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ] || [ "$OS" = "CentOS" ] || [ "$OS" = "Fedora" ]; then
is_rh=1
fi
if [ "$1" = configure ] || [ -n "$is_rh" ]; then
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ]; then
update-rc.d clickhouse-server remove
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
echo "ClickHouse init script has migrated to systemd. Please manually stop old server and restart the service: sudo killall clickhouse-server && sleep 5 && sudo service clickhouse-server restart"
fi
@ -30,17 +38,17 @@ if [ "$1" = configure ]; then
else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then
echo # TODO
if [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
else
update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
echo # TODO [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]
fi
fi
fi
# Make sure the administrative user exists
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then
if [ -n "$is_rh" ]; then
adduser --system --no-create-home --home /nonexistent \
--shell /bin/false ${CLICKHOUSE_USER} > /dev/null
else
@ -123,14 +131,16 @@ Please fix this and reinstall this package." >&2
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
fi
db_get clickhouse-server/default-password
defaultpassword="$RET"
if [ -n "$defaultpassword" ]; then
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
fi
if [ -f /usr/share/debconf/confmodule ]; then
db_get clickhouse-server/default-password
defaultpassword="$RET"
if [ -n "$defaultpassword" ]; then
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
fi
# everything went well, so now let's reset the password
db_set clickhouse-server/default-password ""
# ... done with debconf here
db_stop
# everything went well, so now let's reset the password
db_set clickhouse-server/default-password ""
# ... done with debconf here
db_stop
fi
fi

View File

@ -101,6 +101,8 @@ Possible `level` range: \[3, 12\]. Default value: 9. Greater values stands for b
Greater values stands for better compression and higher CPU usage.
- `Delta(delta_bytes)` - compression approach when raw values are replace with difference of two neighbour values. Up to `delta_bytes` are used for storing delta value.
Possible `delta_bytes` values: 1, 2, 4, 8. Default value for delta bytes is `sizeof(type)`, if it is equals to 1, 2, 4, 8 and equals to 1 otherwise.
- `DoubleDelta` - stores delta of deltas in compact binary form, compressing values down to 1 bit (in the best case). Best compression rates are achieved on monotonic sequences with constant stride, e.g. time samples. Can be used against any fixed-width type. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix instead of 4-bit prefix.
- `Gorilla` - stores (parts of) xored values in compact binary form, compressing values down to 1 bit (in the best case). Best compression rate is achieved when neighbouring values are binary equal. Basic use case - floating point data that do not change rapidly. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types.
Syntax example:
```

86
release
View File

@ -31,8 +31,8 @@
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CURDIR
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CUR_DIR
source "./utils/release/release_lib.sh"
@ -68,6 +68,12 @@ do
# Wrong but fast pbuilder mode: create base package with all depends
EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libicu-dev libreadline-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd"
shift
elif [[ $1 == '--rpm' ]]; then
MAKE_RPM=1
shift
elif [[ $1 == '--tgz' ]]; then
MAKE_TGZ=1
shift
else
echo "Unknown option $1"
exit 2
@ -109,40 +115,50 @@ echo -e "\nCurrent version is $VERSION_STRING"
gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
if [ -z "$USE_PBUILDER" ] ; then
DEB_CC=${DEB_CC:=`which gcc-7 gcc-8 gcc | head -n1`}
DEB_CXX=${DEB_CXX:=`which g++-7 g++-8 g++ | head -n1`}
# Build (only binary packages).
debuild --preserve-env -e PATH \
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
else
export DIST=${DIST:=bionic}
export SET_BUILDRESULT=${SET_BUILDRESULT:=$CURDIR/..}
if [ -z "$NO_BUILD" ] ; then
if [ -z "$USE_PBUILDER" ] ; then
DEB_CC=${DEB_CC:=`which gcc-7 gcc-8 gcc | head -n1`}
DEB_CXX=${DEB_CXX:=`which g++-7 g++-8 g++ | head -n1`}
# Build (only binary packages).
debuild --preserve-env -e PATH \
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
else
export DIST=${DIST:=bionic}
export SET_BUILDRESULT=${SET_BUILDRESULT:=$CUR_DIR/..}
if [[ -z `which pbuilder` ]] ; then
sudo apt install -y pbuilder devscripts ccache fakeroot debhelper debian-archive-keyring debian-keyring lsb-release
fi
. $CURDIR/debian/.pbuilderrc
if [[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ]] ; then
sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
fi
if [[ -n "$FORCE_PBUILDER_CREATE" || ! -e "$BASETGZ" ]] ; then
echo Creating base system $BASETGZ
[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ] && sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
sudo --preserve-env bash -x pbuilder create --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT
fi
if [ "$PBUILDER_AUTOUPDATE" -gt 0 ]; then
# Update every 3 days (60*24*3 minutes)
if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +$PBUILDER_AUTOUPDATE` ; then
echo Updating base system $BASETGZ
sudo --preserve-env pbuilder update --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT
if [[ -z `which pbuilder` ]] ; then
sudo apt install -y pbuilder devscripts ccache fakeroot debhelper debian-archive-keyring debian-keyring lsb-release
fi
fi
pdebuild --configfile $CURDIR/debian/.pbuilderrc -- $PBUILDER_OPT
. $CUR_DIR/debian/.pbuilderrc
if [[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ]] ; then
sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
fi
if [[ -n "$FORCE_PBUILDER_CREATE" || ! -e "$BASETGZ" ]] ; then
echo Creating base system $BASETGZ
[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ] && sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
sudo --preserve-env bash -x pbuilder create --configfile $CUR_DIR/debian/.pbuilderrc $PBUILDER_OPT
fi
if [ "$PBUILDER_AUTOUPDATE" -gt 0 ]; then
# Update every 3 days (60*24*3 minutes)
if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +$PBUILDER_AUTOUPDATE` ; then
echo Updating base system $BASETGZ
sudo --preserve-env pbuilder update --configfile $CUR_DIR/debian/.pbuilderrc $PBUILDER_OPT
fi
fi
pdebuild --configfile $CUR_DIR/debian/.pbuilderrc -- $PBUILDER_OPT
fi
fi
if [ -n "$MAKE_RPM" ]; then
make_rpm
fi
if [ -n "$MAKE_TGZ" ]; then
make_tgz
fi

View File

@ -179,3 +179,92 @@ function gen_dockerfiles {
VERSION_STRING="$1"
ls -1 docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='$VERSION_STRING'/'
}
function make_rpm {
get_version
VERSION_STRING+=$VERSION_POSTFIX
VERSION=$VERSION_STRING
PACKAGE_DIR=../
function deb_unpack {
rm -rf $PACKAGE-$VERSION
alien --verbose --generate --to-rpm --scripts ${PACKAGE_DIR}${PACKAGE}_${VERSION}_${ARCH}.deb
cd $PACKAGE-$VERSION
mv ${PACKAGE}-$VERSION-2.spec ${PACKAGE}-$VERSION-2.spec.tmp
cat ${PACKAGE}-$VERSION-2.spec.tmp \
| grep -vF '%dir "/"' \
| grep -vF '%dir "/usr/"' \
| grep -vF '%dir "/usr/bin/"' \
| grep -vF '%dir "/usr/lib/"' \
| grep -vF '%dir "/usr/lib/debug/"' \
| grep -vF '%dir "/usr/lib/.build-id/"' \
| grep -vF '%dir "/usr/share/"' \
| grep -vF '%dir "/usr/share/doc/"' \
| grep -vF '%dir "/lib/"' \
| grep -vF '%dir "/lib/systemd/"' \
| grep -vF '%dir "/lib/systemd/system/"' \
| grep -vF '%dir "/etc/"' \
| grep -vF '%dir "/etc/security/"' \
| grep -vF '%dir "/etc/security/limits.d/"' \
| grep -vF '%dir "/etc/init.d/"' \
| grep -vF '%dir "/etc/cron.d/"' \
| grep -vF '%dir "/etc/systemd/system/"' \
| grep -vF '%dir "/etc/systemd/"' \
> ${PACKAGE}-$VERSION-2.spec
}
function rpm_pack {
rpmbuild --buildroot="$CUR_DIR/${PACKAGE}-$VERSION" -bb --target ${TARGET} "${PACKAGE}-$VERSION-2.spec"
cd $CUR_DIR
}
function unpack_pack {
deb_unpack
rpm_pack
}
PACKAGE=clickhouse-server
ARCH=all
TARGET=noarch
unpack_pack
PACKAGE=clickhouse-client
ARCH=all
TARGET=noarch
unpack_pack
PACKAGE=clickhouse-test
ARCH=all
TARGET=noarch
deb_unpack
mv ${PACKAGE}-$VERSION-2.spec ${PACKAGE}-$VERSION-2.spec_tmp
echo "Requires: python2" >> ${PACKAGE}-$VERSION-2.spec
#echo "Requires: python2-termcolor" >> ${PACKAGE}-$VERSION-2.spec
cat ${PACKAGE}-$VERSION-2.spec_tmp >> ${PACKAGE}-$VERSION-2.spec
rpm_pack
PACKAGE=clickhouse-common-static
ARCH=amd64
TARGET=x86_64
unpack_pack
PACKAGE=clickhouse-common-static-dbg
ARCH=amd64
TARGET=x86_64
unpack_pack
mv clickhouse-*-${VERSION_STRING}-2.*.rpm ${PACKAGE_DIR}
}
function make_tgz {
get_version
VERSION_STRING+=$VERSION_POSTFIX
VERSION=$VERSION_STRING
PACKAGE_DIR=../
for PACKAGE in clickhouse-server clickhouse-client clickhouse-test clickhouse-common-static clickhouse-common-static-dbg; do
alien --verbose --to-tgz ${PACKAGE_DIR}${PACKAGE}_${VERSION}_*.deb
done
mv clickhouse-*-${VERSION_STRING}.tgz ${PACKAGE_DIR}
}

View File

@ -94,7 +94,7 @@
</div>
<div id="announcement" class="colored-block">
<div class="page">
Upcoming Meetups: <a class="announcement-link" href="https://events.yandex.ru/events/ClickHouse/26-June-2019/" rel="external nofollow" target="_blank">Novosibirsk</a> on June 26 and <a class="announcement-link" href="https://www.huodongxing.com/event/3483759917300" rel="external nofollow" target="_blank">Shenzhen</a> on October 20
Upcoming Meetups: <a class="announcement-link" href="https://yandex.ru/promo/metrica/clickhouse-minsk" rel="external nofollow" target="_blank">Minsk</a> on July 11 and <a class="announcement-link" href="https://www.huodongxing.com/event/3483759917300" rel="external nofollow" target="_blank">Shenzhen</a> on October 20
</div>
</div>
<div class="page">