mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge remote-tracking branch 'main/master' into h3-integration
This commit is contained in:
commit
93523416d7
@ -14,5 +14,6 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
## Upcoming Events
|
||||
* [ClickHouse on HighLoad++ Siberia](https://www.highload.ru/siberia/2019/abstracts/5348) on June 24-25.
|
||||
* [ClickHouse Meetup in Novosibirsk](https://events.yandex.ru/events/ClickHouse/26-June-2019/) on June 26.
|
||||
* [ClickHouse Meetup in Minsk](https://yandex.ru/promo/metrica/clickhouse-minsk) on July 11.
|
||||
* [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20.
|
||||
* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
/** Returns log2 of number, rounded down.
|
||||
@ -30,3 +31,64 @@ inline size_t roundUpToPowerOfTwoOrZero(size_t n)
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline size_t getLeadingZeroBits(T x)
|
||||
{
|
||||
if (!x)
|
||||
return sizeof(x) * 8;
|
||||
|
||||
if constexpr (sizeof(T) <= sizeof(unsigned int))
|
||||
{
|
||||
return __builtin_clz(x);
|
||||
}
|
||||
else if constexpr (sizeof(T) <= sizeof(unsigned long int))
|
||||
{
|
||||
return __builtin_clzl(x);
|
||||
}
|
||||
else
|
||||
{
|
||||
return __builtin_clzll(x);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t getTrailingZeroBits(T x)
|
||||
{
|
||||
if (!x)
|
||||
return sizeof(x) * 8;
|
||||
|
||||
if constexpr (sizeof(T) <= sizeof(unsigned int))
|
||||
{
|
||||
return __builtin_ctz(x);
|
||||
}
|
||||
else if constexpr (sizeof(T) <= sizeof(unsigned long int))
|
||||
{
|
||||
return __builtin_ctzl(x);
|
||||
}
|
||||
else
|
||||
{
|
||||
return __builtin_ctzll(x);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a mask that has '1' for `bits` LSB set:
|
||||
* maskLowBits<UInt8>(3) => 00000111
|
||||
*/
|
||||
template <typename T>
|
||||
inline T maskLowBits(unsigned char bits)
|
||||
{
|
||||
if (bits == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
T result = static_cast<T>(~T{0});
|
||||
if (bits < sizeof(T) * 8)
|
||||
{
|
||||
result = static_cast<T>(result >> (sizeof(T) * 8 - bits));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
325
dbms/src/Compression/CompressionCodecDoubleDelta.cpp
Normal file
325
dbms/src/Compression/CompressionCodecDoubleDelta.cpp
Normal file
@ -0,0 +1,325 @@
|
||||
#include <Compression/CompressionCodecDoubleDelta.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/BitHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
UInt32 getDeltaTypeByteSize(UInt8 data_bytes_size)
|
||||
{
|
||||
// both delta and double delta can be twice the size of data item, but not less than 32 bits and not more that 64.
|
||||
return std::min(64/8, std::max(32/8, data_bytes_size * 2));
|
||||
}
|
||||
|
||||
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
|
||||
{
|
||||
const UInt8 items_count_size = 4;
|
||||
|
||||
return items_count_size + data_bytes_size + getDeltaTypeByteSize(data_bytes_size);
|
||||
}
|
||||
|
||||
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
|
||||
{
|
||||
const UInt32 items_count = uncompressed_size / data_bytes_size;
|
||||
|
||||
// 11111 + max 64 bits of double delta.
|
||||
const UInt32 max_item_size_bits = 5 + getDeltaTypeByteSize(data_bytes_size) * 8;
|
||||
|
||||
// + 8 is to round up to next byte.
|
||||
return (items_count * max_item_size_bits + 8) / 8;
|
||||
}
|
||||
|
||||
struct WriteSpec
|
||||
{
|
||||
const UInt8 prefix_bits;
|
||||
const UInt8 prefix;
|
||||
const UInt8 data_bits;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
WriteSpec getWriteSpec(const T & value)
|
||||
{
|
||||
if (value > -63 && value < 64)
|
||||
{
|
||||
return WriteSpec{2, 0b10, 7};
|
||||
}
|
||||
else if (value > -255 && value < 256)
|
||||
{
|
||||
return WriteSpec{3, 0b110, 9};
|
||||
}
|
||||
else if (value > -2047 && value < 2048)
|
||||
{
|
||||
return WriteSpec{4, 0b1110, 12};
|
||||
}
|
||||
else if (value > std::numeric_limits<Int32>::min() && value < std::numeric_limits<Int32>::max())
|
||||
{
|
||||
return WriteSpec{5, 0b11110, 32};
|
||||
}
|
||||
else
|
||||
{
|
||||
return WriteSpec{5, 0b11111, 64};
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename DeltaType>
|
||||
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
using UnsignedDeltaType = typename std::make_unsigned<DeltaType>::type;
|
||||
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = source_size / sizeof(T);
|
||||
unalignedStore(dest, items_count);
|
||||
dest += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
DeltaType prev_delta{};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
}
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
const T curr_value = unalignedLoad<T>(source);
|
||||
prev_delta = static_cast<DeltaType>(curr_value - prev_value);
|
||||
unalignedStore(dest, prev_delta);
|
||||
|
||||
source += sizeof(curr_value);
|
||||
dest += sizeof(prev_delta);
|
||||
prev_value = curr_value;
|
||||
}
|
||||
|
||||
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(T), source_size - sizeof(T)*2));
|
||||
BitWriter writer(buffer);
|
||||
|
||||
for (; source < source_end; source += sizeof(T))
|
||||
{
|
||||
const T curr_value = unalignedLoad<T>(source);
|
||||
|
||||
const auto delta = curr_value - prev_value;
|
||||
const DeltaType double_delta = static_cast<DeltaType>(delta - static_cast<T>(prev_delta));
|
||||
|
||||
prev_delta = delta;
|
||||
prev_value = curr_value;
|
||||
|
||||
if (double_delta == 0)
|
||||
{
|
||||
writer.writeBits(1, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto sign = std::signbit(double_delta);
|
||||
const auto abs_value = static_cast<UnsignedDeltaType>(std::abs(double_delta));
|
||||
const auto write_spec = getWriteSpec(double_delta);
|
||||
|
||||
writer.writeBits(write_spec.prefix_bits, write_spec.prefix);
|
||||
writer.writeBits(1, sign);
|
||||
writer.writeBits(write_spec.data_bits - 1, abs_value);
|
||||
}
|
||||
}
|
||||
|
||||
writer.flush();
|
||||
|
||||
return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + buffer.count();
|
||||
}
|
||||
|
||||
template <typename T, typename DeltaType>
|
||||
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = unalignedLoad<UInt32>(source);
|
||||
source += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
DeltaType prev_delta{};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
}
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_delta = unalignedLoad<DeltaType>(source);
|
||||
prev_value = static_cast<T>(prev_value + prev_delta);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_delta);
|
||||
dest += sizeof(prev_value);
|
||||
}
|
||||
|
||||
ReadBufferFromMemory buffer(source, source_size - sizeof(prev_value) - sizeof(prev_delta) - sizeof(items_count));
|
||||
BitReader reader(buffer);
|
||||
|
||||
// since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
|
||||
// we have to keep track of items to avoid reading more that there is.
|
||||
for (UInt32 items_read = 2; items_read < items_count && !reader.eof(); ++items_read)
|
||||
{
|
||||
DeltaType double_delta = 0;
|
||||
if (reader.readBit() == 1)
|
||||
{
|
||||
const UInt8 data_sizes[] = {6, 8, 11, 31, 63};
|
||||
UInt8 i = 0;
|
||||
for (; i < sizeof(data_sizes) - 1; ++i)
|
||||
{
|
||||
const auto next_bit = reader.readBit();
|
||||
if (next_bit == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
const UInt8 sign = reader.readBit();
|
||||
double_delta = static_cast<DeltaType>(reader.readBits(data_sizes[i]));
|
||||
if (sign)
|
||||
{
|
||||
double_delta *= -1;
|
||||
}
|
||||
}
|
||||
// else if first bit is zero, no need to read more data.
|
||||
|
||||
const T curr_value = static_cast<T>(prev_value + prev_delta + double_delta);
|
||||
unalignedStore(dest, curr_value);
|
||||
dest += sizeof(curr_value);
|
||||
|
||||
prev_delta = curr_value - prev_value;
|
||||
prev_value = curr_value;
|
||||
}
|
||||
}
|
||||
|
||||
UInt8 getDataBytesSize(DataTypePtr column_type)
|
||||
{
|
||||
UInt8 data_bytes_size = 1;
|
||||
if (column_type && column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
||||
data_bytes_size = static_cast<UInt8>(max_size);
|
||||
}
|
||||
return data_bytes_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
CompressionCodecDoubleDelta::CompressionCodecDoubleDelta(UInt8 data_bytes_size_)
|
||||
: data_bytes_size(data_bytes_size_)
|
||||
{
|
||||
}
|
||||
|
||||
UInt8 CompressionCodecDoubleDelta::getMethodByte() const
|
||||
{
|
||||
return static_cast<UInt8>(CompressionMethodByte::DoubleDelta);
|
||||
}
|
||||
|
||||
String CompressionCodecDoubleDelta::getCodecDesc() const
|
||||
{
|
||||
return "DoubleDelta";
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecDoubleDelta::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
const auto result = 2 // common header
|
||||
+ data_bytes_size // max bytes skipped if source is not properly aligned.
|
||||
+ getCompressedHeaderSize(data_bytes_size) // data-specific header
|
||||
+ getCompressedDataSize(data_bytes_size, uncompressed_size);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
UInt8 bytes_to_skip = source_size % data_bytes_size;
|
||||
dest[0] = data_bytes_size;
|
||||
dest[1] = bytes_to_skip;
|
||||
memcpy(&dest[2], source, bytes_to_skip);
|
||||
size_t start_pos = 2 + bytes_to_skip;
|
||||
UInt32 compressed_size = 0;
|
||||
switch (data_bytes_size)
|
||||
{
|
||||
case 1:
|
||||
compressed_size = compressDataForType<UInt8, Int16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 2:
|
||||
compressed_size = compressDataForType<UInt16, Int32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 4:
|
||||
compressed_size = compressDataForType<UInt32, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 8:
|
||||
compressed_size = compressDataForType<UInt64, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1 + 1 + compressed_size;
|
||||
}
|
||||
|
||||
void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 /* uncompressed_size */) const
|
||||
{
|
||||
UInt8 bytes_size = source[0];
|
||||
UInt8 bytes_to_skip = source[1];
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
switch (bytes_size)
|
||||
{
|
||||
case 1:
|
||||
decompressDataForType<UInt8, Int16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 2:
|
||||
decompressDataForType<UInt16, Int32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 4:
|
||||
decompressDataForType<UInt32, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 8:
|
||||
decompressDataForType<UInt64, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CompressionCodecDoubleDelta::useInfoAboutType(DataTypePtr data_type)
|
||||
{
|
||||
data_bytes_size = getDataBytesSize(data_type);
|
||||
}
|
||||
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::DoubleDelta);
|
||||
factory.registerCompressionCodecWithType("DoubleDelta", method_code, [&](const ASTPtr &, DataTypePtr column_type) -> CompressionCodecPtr
|
||||
{
|
||||
UInt8 delta_bytes_size = getDataBytesSize(column_type);
|
||||
return std::make_shared<CompressionCodecDoubleDelta>(delta_bytes_size);
|
||||
});
|
||||
}
|
||||
}
|
30
dbms/src/Compression/CompressionCodecDoubleDelta.h
Normal file
30
dbms/src/Compression/CompressionCodecDoubleDelta.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecDoubleDelta : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
CompressionCodecDoubleDelta(UInt8 data_bytes_size_);
|
||||
|
||||
UInt8 getMethodByte() const override;
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
void useInfoAboutType(DataTypePtr data_type) override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
private:
|
||||
UInt8 data_bytes_size;
|
||||
};
|
||||
|
||||
}
|
326
dbms/src/Compression/CompressionCodecGorilla.cpp
Normal file
326
dbms/src/Compression/CompressionCodecGorilla.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
#include <Compression/CompressionCodecGorilla.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/BitHelpers.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
#include <bitset>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
constexpr inline UInt8 getBitLengthOfLength(UInt8 data_bytes_size)
|
||||
{
|
||||
// 1-byte value is 8 bits, and we need 4 bits to represent 8 : 1000,
|
||||
// 2-byte 16 bits => 5
|
||||
// 4-byte 32 bits => 6
|
||||
// 8-byte 64 bits => 7
|
||||
const UInt8 bit_lengths[] = {0, 4, 5, 0, 6, 0, 0, 0, 7};
|
||||
assert(data_bytes_size >= 1 && data_bytes_size < sizeof(bit_lengths) && bit_lengths[data_bytes_size] != 0);
|
||||
|
||||
return bit_lengths[data_bytes_size];
|
||||
}
|
||||
|
||||
|
||||
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
|
||||
{
|
||||
const UInt8 items_count_size = 4;
|
||||
|
||||
return items_count_size + data_bytes_size;
|
||||
}
|
||||
|
||||
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
|
||||
{
|
||||
const UInt32 items_count = uncompressed_size / data_bytes_size;
|
||||
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(data_bytes_size);
|
||||
// -1 since there must be at least 1 non-zero bit.
|
||||
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
|
||||
|
||||
// worst case (for 32-bit value):
|
||||
// 11 + 5 bits of leading zeroes bit-size + 5 bits of data bit-size + non-zero data bits.
|
||||
const UInt32 max_item_size_bits = 2 + LEADING_ZEROES_BIT_LENGTH + DATA_BIT_LENGTH + data_bytes_size * 8;
|
||||
|
||||
// + 8 is to round up to next byte.
|
||||
return (items_count * max_item_size_bits + 8) / 8;
|
||||
}
|
||||
|
||||
struct binary_value_info
|
||||
{
|
||||
UInt8 leading_zero_bits;
|
||||
UInt8 data_bits;
|
||||
UInt8 trailing_zero_bits;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
binary_value_info getLeadingAndTrailingBits(const T & value)
|
||||
{
|
||||
constexpr UInt8 bit_size = sizeof(T) * 8;
|
||||
|
||||
const UInt8 lz = getLeadingZeroBits(value);
|
||||
const UInt8 tz = getTrailingZeroBits(value);
|
||||
const UInt8 data_size = value == 0 ? 0 : static_cast<UInt8>(bit_size - lz - tz);
|
||||
return binary_value_info{lz, data_size, tz};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
|
||||
// -1 since there must be at least 1 non-zero bit.
|
||||
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
|
||||
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = source_size / sizeof(T);
|
||||
|
||||
unalignedStore(dest, items_count);
|
||||
dest += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
// That would cause first XORed value to be written in-full.
|
||||
binary_value_info prev_xored_info{0, 0, 0};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
}
|
||||
|
||||
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(T), source_size - sizeof(items_count) - sizeof(prev_value)));
|
||||
BitWriter writer(buffer);
|
||||
|
||||
while (source < source_end)
|
||||
{
|
||||
const T curr_value = unalignedLoad<T>(source);
|
||||
source += sizeof(curr_value);
|
||||
|
||||
const auto xored_data = curr_value ^ prev_value;
|
||||
const binary_value_info curr_xored_info = getLeadingAndTrailingBits(xored_data);
|
||||
|
||||
if (xored_data == 0)
|
||||
{
|
||||
writer.writeBits(1, 0);
|
||||
}
|
||||
else if (prev_xored_info.data_bits != 0
|
||||
&& prev_xored_info.leading_zero_bits <= curr_xored_info.leading_zero_bits
|
||||
&& prev_xored_info.trailing_zero_bits <= curr_xored_info.trailing_zero_bits)
|
||||
{
|
||||
writer.writeBits(2, 0b10);
|
||||
writer.writeBits(prev_xored_info.data_bits, xored_data >> prev_xored_info.trailing_zero_bits);
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.writeBits(2, 0b11);
|
||||
writer.writeBits(LEADING_ZEROES_BIT_LENGTH, curr_xored_info.leading_zero_bits);
|
||||
writer.writeBits(DATA_BIT_LENGTH, curr_xored_info.data_bits);
|
||||
writer.writeBits(curr_xored_info.data_bits, xored_data >> curr_xored_info.trailing_zero_bits);
|
||||
prev_xored_info = curr_xored_info;
|
||||
}
|
||||
|
||||
prev_value = curr_value;
|
||||
}
|
||||
|
||||
writer.flush();
|
||||
|
||||
return sizeof(items_count) + sizeof(prev_value) + buffer.count();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
|
||||
// -1 since there must be at least 1 non-zero bit.
|
||||
static const auto LEADING_ZEROES_BIT_LENGTH = DATA_BIT_LENGTH - 1;
|
||||
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = unalignedLoad<UInt32>(source);
|
||||
source += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
}
|
||||
|
||||
ReadBufferFromMemory buffer(source, source_size - sizeof(items_count) - sizeof(prev_value));
|
||||
BitReader reader(buffer);
|
||||
|
||||
binary_value_info prev_xored_info{0, 0, 0};
|
||||
|
||||
// since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
|
||||
// we have to keep track of items to avoid reading more that there is.
|
||||
for (UInt32 items_read = 1; items_read < items_count && !reader.eof(); ++items_read)
|
||||
{
|
||||
T curr_value = prev_value;
|
||||
binary_value_info curr_xored_info = prev_xored_info;
|
||||
T xored_data{};
|
||||
|
||||
if (reader.readBit() == 1)
|
||||
{
|
||||
if (reader.readBit() == 1)
|
||||
{
|
||||
// 0b11 prefix
|
||||
curr_xored_info.leading_zero_bits = reader.readBits(LEADING_ZEROES_BIT_LENGTH);
|
||||
curr_xored_info.data_bits = reader.readBits(DATA_BIT_LENGTH);
|
||||
curr_xored_info.trailing_zero_bits = sizeof(T) * 8 - curr_xored_info.leading_zero_bits - curr_xored_info.data_bits;
|
||||
}
|
||||
// else: 0b10 prefix - use prev_xored_info
|
||||
|
||||
if (curr_xored_info.leading_zero_bits == 0
|
||||
&& curr_xored_info.data_bits == 0
|
||||
&& curr_xored_info.trailing_zero_bits == 0)
|
||||
{
|
||||
throw Exception("Cannot decompress gorilla-encoded data: corrupted input data.",
|
||||
ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
|
||||
xored_data = reader.readBits(curr_xored_info.data_bits);
|
||||
xored_data <<= curr_xored_info.trailing_zero_bits;
|
||||
curr_value = prev_value ^ xored_data;
|
||||
}
|
||||
// else: 0b0 prefix - use prev_value
|
||||
|
||||
unalignedStore(dest, curr_value);
|
||||
dest += sizeof(curr_value);
|
||||
|
||||
prev_xored_info = curr_xored_info;
|
||||
prev_value = curr_value;
|
||||
}
|
||||
}
|
||||
|
||||
UInt8 getDataBytesSize(DataTypePtr column_type)
|
||||
{
|
||||
UInt8 delta_bytes_size = 1;
|
||||
if (column_type && column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
||||
delta_bytes_size = static_cast<UInt8>(max_size);
|
||||
}
|
||||
return delta_bytes_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
CompressionCodecGorilla::CompressionCodecGorilla(UInt8 data_bytes_size_)
|
||||
: data_bytes_size(data_bytes_size_)
|
||||
{
|
||||
}
|
||||
|
||||
UInt8 CompressionCodecGorilla::getMethodByte() const
|
||||
{
|
||||
return static_cast<UInt8>(CompressionMethodByte::Gorilla);
|
||||
}
|
||||
|
||||
String CompressionCodecGorilla::getCodecDesc() const
|
||||
{
|
||||
return "Gorilla";
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecGorilla::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
const auto result = 2 // common header
|
||||
+ data_bytes_size // max bytes skipped if source is not properly aligned.
|
||||
+ getCompressedHeaderSize(data_bytes_size) // data-specific header
|
||||
+ getCompressedDataSize(data_bytes_size, uncompressed_size);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
UInt8 bytes_to_skip = source_size % data_bytes_size;
|
||||
dest[0] = data_bytes_size;
|
||||
dest[1] = bytes_to_skip;
|
||||
memcpy(&dest[2], source, bytes_to_skip);
|
||||
size_t start_pos = 2 + bytes_to_skip;
|
||||
UInt32 compressed_size = 0;
|
||||
switch (data_bytes_size)
|
||||
{
|
||||
case 1:
|
||||
compressed_size = compressDataForType<UInt8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 2:
|
||||
compressed_size = compressDataForType<UInt16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 4:
|
||||
compressed_size = compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 8:
|
||||
compressed_size = compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1 + 1 + compressed_size;
|
||||
}
|
||||
|
||||
void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 /* uncompressed_size */) const
|
||||
{
|
||||
UInt8 bytes_size = source[0];
|
||||
UInt8 bytes_to_skip = source[1];
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
switch (bytes_size)
|
||||
{
|
||||
case 1:
|
||||
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 2:
|
||||
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 4:
|
||||
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 8:
|
||||
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CompressionCodecGorilla::useInfoAboutType(DataTypePtr data_type)
|
||||
{
|
||||
data_bytes_size = getDataBytesSize(data_type);
|
||||
}
|
||||
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::Gorilla);
|
||||
factory.registerCompressionCodecWithType("Gorilla", method_code, [&](const ASTPtr &, DataTypePtr column_type) -> CompressionCodecPtr
|
||||
{
|
||||
UInt8 delta_bytes_size = getDataBytesSize(column_type);
|
||||
return std::make_shared<CompressionCodecGorilla>(delta_bytes_size);
|
||||
});
|
||||
}
|
||||
}
|
30
dbms/src/Compression/CompressionCodecGorilla.h
Normal file
30
dbms/src/Compression/CompressionCodecGorilla.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecGorilla : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
CompressionCodecGorilla(UInt8 data_bytes_size_);
|
||||
|
||||
UInt8 getMethodByte() const override;
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
void useInfoAboutType(DataTypePtr data_type) override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
private:
|
||||
UInt8 data_bytes_size;
|
||||
};
|
||||
|
||||
}
|
@ -138,6 +138,8 @@ void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
||||
void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecT64(CompressionCodecFactory & factory);
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
|
||||
CompressionCodecFactory::CompressionCodecFactory()
|
||||
{
|
||||
@ -149,6 +151,8 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecLZ4HC(*this);
|
||||
registerCodecDelta(*this);
|
||||
registerCodecT64(*this);
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -41,6 +41,8 @@ enum class CompressionMethodByte : uint8_t
|
||||
Multiple = 0x91,
|
||||
Delta = 0x92,
|
||||
T64 = 0x93,
|
||||
DoubleDelta = 0x94,
|
||||
Gorilla = 0x95,
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
UInt32 ICompressionCodec::compress(char * source, UInt32 source_size, char * dest) const
|
||||
UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
dest[0] = getMethodByte();
|
||||
UInt8 header_size = getHeaderSize();
|
||||
@ -41,7 +41,7 @@ UInt32 ICompressionCodec::compress(char * source, UInt32 source_size, char * des
|
||||
}
|
||||
|
||||
|
||||
UInt32 ICompressionCodec::decompress(char * source, UInt32 source_size, char * dest) const
|
||||
UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
UInt8 method = source[0];
|
||||
if (method != getMethodByte())
|
||||
|
@ -35,10 +35,10 @@ public:
|
||||
virtual String getCodecDesc() const = 0;
|
||||
|
||||
/// Compressed bytes from uncompressed source to dest. Dest should preallocate memory
|
||||
virtual UInt32 compress(char * source, UInt32 source_size, char * dest) const;
|
||||
virtual UInt32 compress(const char * source, UInt32 source_size, char * dest) const;
|
||||
|
||||
/// Decompress bytes from compressed source to dest. Dest should preallocate memory
|
||||
virtual UInt32 decompress(char * source, UInt32 source_size, char * dest) const;
|
||||
virtual UInt32 decompress(const char * source, UInt32 source_size, char * dest) const;
|
||||
|
||||
/// Number of bytes, that will be used to compress uncompressed_size bytes with current codec
|
||||
virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { return getHeaderSize() + getMaxCompressedDataSize(uncompressed_size); }
|
||||
|
385
dbms/src/Compression/tests/gtest_compressionCodec.cpp
Normal file
385
dbms/src/Compression/tests/gtest_compressionCodec.cpp
Normal file
@ -0,0 +1,385 @@
|
||||
#include <Compression/CompressionCodecDoubleDelta.h>
|
||||
#include <Compression/CompressionCodecGorilla.h>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <boost/format.hpp>
|
||||
|
||||
#include <cmath>
|
||||
#include <initializer_list>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <typeinfo>
|
||||
#include <iterator>
|
||||
#include <optional>
|
||||
#include <iostream>
|
||||
#include <bitset>
|
||||
#include <string.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wundef"
|
||||
#endif
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
template <typename T>
|
||||
std::string bin(const T & value, size_t bits = sizeof(T)*8)
|
||||
{
|
||||
static const UInt8 MAX_BITS = sizeof(T)*8;
|
||||
assert(bits <= MAX_BITS);
|
||||
|
||||
return std::bitset<sizeof(T) * 8>(static_cast<unsigned long long>(value))
|
||||
.to_string().substr(MAX_BITS - bits, bits);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const char* type_name()
|
||||
{
|
||||
return typeid(T).name();
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<UInt32>()
|
||||
{
|
||||
return "uint32";
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<Int32>()
|
||||
{
|
||||
return "int32";
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<UInt64>()
|
||||
{
|
||||
return "uint64";
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<Int64>()
|
||||
{
|
||||
return "int64";
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<Float32>()
|
||||
{
|
||||
return "float";
|
||||
}
|
||||
|
||||
template <>
|
||||
const char* type_name<Float64>()
|
||||
{
|
||||
return "double";
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename ContainerLeft, typename ContainerRight>
|
||||
::testing::AssertionResult EqualByteContainersAs(const ContainerLeft & left, const ContainerRight & right)
|
||||
{
|
||||
static_assert(sizeof(typename ContainerLeft::value_type) == 1, "Expected byte-container");
|
||||
static_assert(sizeof(typename ContainerRight::value_type) == 1, "Expected byte-container");
|
||||
|
||||
::testing::AssertionResult result = ::testing::AssertionSuccess();
|
||||
|
||||
ReadBufferFromMemory left_read_buffer(left.data(), left.size());
|
||||
ReadBufferFromMemory right_read_buffer(right.data(), right.size());
|
||||
|
||||
const auto l_size = left.size() / sizeof(T);
|
||||
const auto r_size = right.size() / sizeof(T);
|
||||
const auto size = std::min(l_size, r_size);
|
||||
|
||||
if (l_size != r_size)
|
||||
{
|
||||
result = ::testing::AssertionFailure() << "size mismatch" << " expected: " << l_size << " got:" << r_size;
|
||||
}
|
||||
|
||||
const auto MAX_MISMATCHING_ITEMS = 5;
|
||||
int mismatching_items = 0;
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
T left_value{};
|
||||
left_read_buffer.readStrict(reinterpret_cast<char*>(&left_value), sizeof(left_value));
|
||||
|
||||
T right_value{};
|
||||
right_read_buffer.readStrict(reinterpret_cast<char*>(&right_value), sizeof(right_value));
|
||||
|
||||
if (left_value != right_value)
|
||||
{
|
||||
if (result)
|
||||
{
|
||||
result = ::testing::AssertionFailure();
|
||||
}
|
||||
|
||||
result << "mismatching " << sizeof(T) << "-byte item #" << i
|
||||
<< "\nexpected: " << bin(left_value)
|
||||
<< "\ngot : " << bin(right_value)
|
||||
<< std::endl;
|
||||
|
||||
if (++mismatching_items >= MAX_MISMATCHING_ITEMS)
|
||||
{
|
||||
result << "..." << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct CodecTestParam
|
||||
{
|
||||
std::vector<char> source_data;
|
||||
UInt8 data_byte_size;
|
||||
std::string case_name;
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const CodecTestParam & param)
|
||||
{
|
||||
return ostr << "name: " << param.case_name
|
||||
<< "\nbyte size: " << static_cast<UInt32>(param.data_byte_size)
|
||||
<< "\ndata size: " << param.source_data.size();
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
CodecTestParam makeParam(Args && ... args)
|
||||
{
|
||||
std::initializer_list<T> vals{static_cast<T>(args)...};
|
||||
std::vector<char> data(sizeof(T) * std::size(vals));
|
||||
|
||||
char * write_pos = data.data();
|
||||
for (const auto & v : vals)
|
||||
{
|
||||
unalignedStore<T>(write_pos, v);
|
||||
write_pos += sizeof(v);
|
||||
}
|
||||
|
||||
return CodecTestParam{std::move(data), sizeof(T),
|
||||
(boost::format("%1% %2%") % (sizeof(T) * std::size(vals)) % " predefined values").str()};
|
||||
}
|
||||
|
||||
template <typename T, size_t Begin = 1, size_t End = 10000, typename Generator>
|
||||
CodecTestParam generateParam(Generator gen, const char* gen_name)
|
||||
{
|
||||
static_assert (End >= Begin, "End must be not less than Begin");
|
||||
|
||||
std::vector<char> data(sizeof(T) * (End - Begin));
|
||||
char * write_pos = data.data();
|
||||
|
||||
for (size_t i = Begin; i < End; ++i)
|
||||
{
|
||||
const T v = gen(static_cast<T>(i));
|
||||
unalignedStore<T>(write_pos, v);
|
||||
write_pos += sizeof(v);
|
||||
}
|
||||
|
||||
return CodecTestParam{std::move(data), sizeof(T),
|
||||
(boost::format("%1% from %2% (%3% => %4%)") % type_name<T>() % gen_name % Begin % End).str()};
|
||||
}
|
||||
|
||||
void TestTranscoding(ICompressionCodec * codec, const CodecTestParam & param)
|
||||
{
|
||||
const auto & source_data = param.source_data;
|
||||
|
||||
const UInt32 encoded_max_size = codec->getCompressedReserveSize(source_data.size());
|
||||
PODArray<char> encoded(encoded_max_size);
|
||||
|
||||
const UInt32 encoded_size = codec->compress(source_data.data(), source_data.size(), encoded.data());
|
||||
encoded.resize(encoded_size);
|
||||
|
||||
PODArray<char> decoded(source_data.size());
|
||||
const UInt32 decoded_size = codec->decompress(encoded.data(), encoded.size(), decoded.data());
|
||||
decoded.resize(decoded_size);
|
||||
|
||||
switch (param.data_byte_size)
|
||||
{
|
||||
case 1:
|
||||
ASSERT_TRUE(EqualByteContainersAs<UInt8>(source_data, decoded));
|
||||
break;
|
||||
case 2:
|
||||
ASSERT_TRUE(EqualByteContainersAs<UInt16>(source_data, decoded));
|
||||
break;
|
||||
case 4:
|
||||
ASSERT_TRUE(EqualByteContainersAs<UInt32>(source_data, decoded));
|
||||
break;
|
||||
case 8:
|
||||
ASSERT_TRUE(EqualByteContainersAs<UInt64>(source_data, decoded));
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Invalid data_byte_size: " << param.data_byte_size;
|
||||
}
|
||||
}
|
||||
|
||||
class CodecTest : public ::testing::TestWithParam<CodecTestParam>
|
||||
{
|
||||
public:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
// To make random predicatble and avoid failing test "out of the blue".
|
||||
srand(0);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(CodecTest, DoubleDelta)
|
||||
{
|
||||
const auto & param = GetParam();
|
||||
auto codec = std::make_unique<CompressionCodecDoubleDelta>(param.data_byte_size);
|
||||
|
||||
TestTranscoding(codec.get(), param);
|
||||
}
|
||||
|
||||
TEST_P(CodecTest, Gorilla)
|
||||
{
|
||||
const auto & param = GetParam();
|
||||
auto codec = std::make_unique<CompressionCodecGorilla>(param.data_byte_size);
|
||||
|
||||
TestTranscoding(codec.get(), param);
|
||||
}
|
||||
|
||||
auto SameValueGenerator = [](auto value)
|
||||
{
|
||||
return [=](auto i)
|
||||
{
|
||||
return static_cast<decltype(i)>(value);
|
||||
};
|
||||
};
|
||||
|
||||
auto SequentialGenerator = [](auto stride = 1)
|
||||
{
|
||||
return [=](auto i)
|
||||
{
|
||||
using ValueType = decltype(i);
|
||||
return static_cast<ValueType>(stride * i);
|
||||
};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MonotonicGenerator
|
||||
{
|
||||
MonotonicGenerator(T stride = 1, size_t max_step = 10)
|
||||
: prev_value{},
|
||||
stride(stride),
|
||||
max_step(max_step)
|
||||
{}
|
||||
|
||||
template <typename U>
|
||||
U operator()(U i)
|
||||
{
|
||||
if (!prev_value.has_value())
|
||||
{
|
||||
prev_value = i * stride;
|
||||
}
|
||||
|
||||
const U result = *prev_value + static_cast<T>(stride * (rand() % max_step));
|
||||
|
||||
prev_value = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<T> prev_value;
|
||||
const T stride;
|
||||
const size_t max_step;
|
||||
};
|
||||
|
||||
auto MinMaxGenerator = [](auto i)
|
||||
{
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
return std::numeric_limits<decltype(i)>::min();
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::numeric_limits<decltype(i)>::max();
|
||||
}
|
||||
};
|
||||
|
||||
auto RandomGenerator = [](auto i) {return static_cast<decltype(i)>(rand());};
|
||||
|
||||
auto RandomishGenerator = [](auto i)
|
||||
{
|
||||
return static_cast<decltype(i)>(sin(static_cast<double>(i) * i) * i);
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Basic,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
makeParam<UInt32>(1, 2, 3, 4),
|
||||
makeParam<UInt64>(1, 2, 3, 4),
|
||||
makeParam<Float32>(1.1, 2.2, 3.3, 4.4),
|
||||
makeParam<Float64>(1.1, 2.2, 3.3, 4.4)
|
||||
),
|
||||
);
|
||||
|
||||
#define G(generator) generator, #generator
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Same,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(SameValueGenerator(1000))),
|
||||
generateParam<Int32>(G(SameValueGenerator(-1000))),
|
||||
generateParam<UInt64>(G(SameValueGenerator(1000))),
|
||||
generateParam<Int64>(G(SameValueGenerator(-1000))),
|
||||
generateParam<Float32>(G(SameValueGenerator(M_E))),
|
||||
generateParam<Float64>(G(SameValueGenerator(M_E)))
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Sequential,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(SequentialGenerator(1))),
|
||||
generateParam<Int32>(G(SequentialGenerator(-1))),
|
||||
generateParam<UInt64>(G(SequentialGenerator(1))),
|
||||
generateParam<Int64>(G(SequentialGenerator(-1))),
|
||||
generateParam<Float32>(G(SequentialGenerator(M_E))),
|
||||
generateParam<Float64>(G(SequentialGenerator(M_E)))
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Monotonic,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(MonotonicGenerator<UInt32>(1, 5))),
|
||||
generateParam<Int32>(G(MonotonicGenerator<Int32>(-1, 5))),
|
||||
generateParam<UInt64>(G(MonotonicGenerator<UInt64>(1, 5))),
|
||||
generateParam<Int64>(G(MonotonicGenerator<Int64>(-1, 5))),
|
||||
generateParam<Float32>(G(MonotonicGenerator<Float32>(M_E, 5))),
|
||||
generateParam<Float64>(G(MonotonicGenerator<Float64>(M_E, 5)))
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Random,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(RandomGenerator)),
|
||||
generateParam<UInt64>(G(RandomGenerator))
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(RandomLike,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<Int32>(G(RandomishGenerator)),
|
||||
generateParam<Int64>(G(RandomishGenerator)),
|
||||
generateParam<Float32>(G(RandomishGenerator)),
|
||||
generateParam<Float64>(G(RandomishGenerator))
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Overflow,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(MinMaxGenerator)),
|
||||
generateParam<Int32>(G(MinMaxGenerator)),
|
||||
generateParam<UInt64>(G(MinMaxGenerator)),
|
||||
generateParam<Int64>(G(MinMaxGenerator))
|
||||
),
|
||||
);
|
178
dbms/src/IO/BitHelpers.h
Normal file
178
dbms/src/IO/BitHelpers.h
Normal file
@ -0,0 +1,178 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Reads data from underlying ReadBuffer bit by bit, max 64 bits at once.
|
||||
*
|
||||
* reads MSB bits first, imagine that you have a data:
|
||||
* 11110000 10101010 00100100 11111110
|
||||
*
|
||||
* Given that r is BitReader created with a ReadBuffer that reads from data above:
|
||||
* r.readBits(3) => 0b111
|
||||
* r.readBit() => 0b1
|
||||
* r.readBits(8) => 0b1010 // 4 leading zero-bits are not shown
|
||||
* r.readBit() => 0b1
|
||||
* r.readBit() => 0b0
|
||||
* r.readBits(15) => 0b10001001001111111
|
||||
* r.readBit() => 0b0
|
||||
**/
|
||||
|
||||
class BitReader
|
||||
{
|
||||
ReadBuffer & buf;
|
||||
|
||||
UInt64 bits_buffer;
|
||||
UInt8 bits_count;
|
||||
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
|
||||
|
||||
public:
|
||||
BitReader(ReadBuffer & buf_)
|
||||
: buf(buf_),
|
||||
bits_buffer(0),
|
||||
bits_count(0)
|
||||
{}
|
||||
|
||||
~BitReader()
|
||||
{}
|
||||
|
||||
inline UInt64 readBits(UInt8 bits)
|
||||
{
|
||||
UInt64 result = 0;
|
||||
bits = std::min(static_cast<UInt8>(sizeof(result) * 8), bits);
|
||||
|
||||
while (bits != 0)
|
||||
{
|
||||
if (bits_count == 0)
|
||||
{
|
||||
fillBuffer();
|
||||
if (bits_count == 0)
|
||||
{
|
||||
// EOF.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto to_read = std::min(bits, bits_count);
|
||||
|
||||
const UInt64 v = bits_buffer >> (bits_count - to_read);
|
||||
const UInt64 mask = maskLowBits<UInt64>(to_read);
|
||||
const UInt64 value = v & mask;
|
||||
result |= value;
|
||||
|
||||
// unset bits that were read
|
||||
bits_buffer &= ~(mask << (bits_count - to_read));
|
||||
bits_count -= to_read;
|
||||
bits -= to_read;
|
||||
|
||||
result <<= std::min(bits, BIT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UInt64 peekBits(UInt8 /*bits*/)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline UInt8 readBit()
|
||||
{
|
||||
return static_cast<UInt8>(readBits(1));
|
||||
}
|
||||
|
||||
inline bool eof() const
|
||||
{
|
||||
return bits_count == 0 && buf.eof();
|
||||
}
|
||||
|
||||
private:
|
||||
void fillBuffer()
|
||||
{
|
||||
auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
|
||||
bits_buffer = be64toh(bits_buffer);
|
||||
bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
|
||||
|
||||
bits_count = static_cast<UInt8>(read) * 8;
|
||||
}
|
||||
};
|
||||
|
||||
class BitWriter
|
||||
{
|
||||
WriteBuffer & buf;
|
||||
|
||||
UInt64 bits_buffer;
|
||||
UInt8 bits_count;
|
||||
|
||||
static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;
|
||||
|
||||
public:
|
||||
BitWriter(WriteBuffer & buf_)
|
||||
: buf(buf_),
|
||||
bits_buffer(0),
|
||||
bits_count(0)
|
||||
{}
|
||||
|
||||
~BitWriter()
|
||||
{
|
||||
flush();
|
||||
}
|
||||
|
||||
inline void writeBits(UInt8 bits, UInt64 value)
|
||||
{
|
||||
bits = std::min(static_cast<UInt8>(sizeof(value) * 8), bits);
|
||||
|
||||
while (bits > 0)
|
||||
{
|
||||
auto v = value;
|
||||
auto to_write = bits;
|
||||
|
||||
const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
|
||||
if (capacity < bits)
|
||||
{
|
||||
v >>= bits - capacity;
|
||||
to_write = capacity;
|
||||
}
|
||||
|
||||
const UInt64 mask = maskLowBits<UInt64>(to_write);
|
||||
v &= mask;
|
||||
// assert(v <= 255);
|
||||
|
||||
bits_buffer <<= to_write;
|
||||
bits_buffer |= v;
|
||||
bits_count += to_write;
|
||||
|
||||
if (bits_count < BIT_BUFFER_SIZE)
|
||||
break;
|
||||
|
||||
doFlush();
|
||||
bits -= to_write;
|
||||
}
|
||||
}
|
||||
|
||||
inline void flush()
|
||||
{
|
||||
if (bits_count != 0)
|
||||
{
|
||||
bits_buffer <<= (BIT_BUFFER_SIZE - bits_count);
|
||||
doFlush();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void doFlush()
|
||||
{
|
||||
bits_buffer = htobe64(bits_buffer);
|
||||
buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);
|
||||
|
||||
bits_count = 0;
|
||||
bits_buffer = 0;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
213
dbms/src/IO/tests/gtest_bit_io.cpp
Normal file
213
dbms/src/IO/tests/gtest_bit_io.cpp
Normal file
@ -0,0 +1,213 @@
|
||||
#include <string.h>
|
||||
#include <IO/BitHelpers.h>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <IO/MemoryReadWriteBuffer.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <typeinfo>
|
||||
#include <iostream>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wundef"
|
||||
#endif
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
// Intentionally asymmetric both byte and word-size to detect read and write inconsistencies
|
||||
// each prime bit is set to 0.
|
||||
// v-61 v-53 v-47 v-41 v-37 v-31 v-23 v-17 v-11 v-5
|
||||
const UInt64 BIT_PATTERN = 0b11101011'11101111'10111010'11101111'10101111'10111010'11101011'10101001;
|
||||
const UInt8 PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61};
|
||||
|
||||
template <typename T>
|
||||
std::string bin(const T & value, size_t bits = sizeof(T)*8)
|
||||
{
|
||||
static const UInt8 MAX_BITS = sizeof(T)*8;
|
||||
assert(bits <= MAX_BITS);
|
||||
|
||||
return std::bitset<sizeof(T) * 8>(static_cast<unsigned long long>(value))
|
||||
.to_string().substr(MAX_BITS - bits, bits);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T getBits(UInt8 bits, const T & value)
|
||||
{
|
||||
const T mask = ((static_cast<T>(1) << static_cast<T>(bits)) - 1);
|
||||
return value & mask;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::ostream & dumpBuffer(const T begin,
|
||||
const T end,
|
||||
std::ostream * destination,
|
||||
const char* col_sep = " ",
|
||||
const char* row_sep = "\n",
|
||||
const size_t cols_in_row = 8,
|
||||
UInt32 max_bytes = 0xFFFFFFFF)
|
||||
{
|
||||
size_t col = 0;
|
||||
for (auto p = begin; p < end && p - begin < max_bytes; ++p)
|
||||
{
|
||||
*destination << bin(*p);
|
||||
if (++col % cols_in_row == 0)
|
||||
{
|
||||
if (row_sep)
|
||||
*destination << row_sep;
|
||||
}
|
||||
else if (col_sep)
|
||||
{
|
||||
*destination << col_sep;
|
||||
}
|
||||
}
|
||||
|
||||
return *destination;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string dumpContents(const T& container,
|
||||
const char* col_sep = " ",
|
||||
const char* row_sep = "\n",
|
||||
const size_t cols_in_row = 8)
|
||||
|
||||
{
|
||||
std::stringstream sstr;
|
||||
dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);
|
||||
|
||||
return sstr.str();
|
||||
}
|
||||
|
||||
struct TestCaseParameter
|
||||
{
|
||||
std::vector<std::pair<UInt8, UInt64>> bits_and_vals;
|
||||
std::string expected_buffer_binary;
|
||||
|
||||
explicit TestCaseParameter(std::vector<std::pair<UInt8, UInt64>> vals, std::string binary = std::string{})
|
||||
: bits_and_vals(std::move(vals)),
|
||||
expected_buffer_binary(binary)
|
||||
{}
|
||||
};
|
||||
|
||||
class BitIO : public ::testing::TestWithParam<TestCaseParameter>
|
||||
{};
|
||||
|
||||
TEST_P(BitIO, WriteAndRead)
|
||||
{
|
||||
const auto & param = GetParam();
|
||||
const auto & bits_and_vals = param.bits_and_vals;
|
||||
const auto & expected_buffer_binary = param.expected_buffer_binary;
|
||||
|
||||
UInt64 max_buffer_size = 0;
|
||||
for (const auto & bv : bits_and_vals)
|
||||
{
|
||||
max_buffer_size += bv.first;
|
||||
}
|
||||
max_buffer_size = (max_buffer_size + 7) / 8;
|
||||
SCOPED_TRACE(max_buffer_size);
|
||||
|
||||
PODArray<char> data(max_buffer_size);
|
||||
|
||||
{
|
||||
WriteBuffer write_buffer(data.data(), data.size());
|
||||
BitWriter writer(write_buffer);
|
||||
for (const auto & bv : bits_and_vals)
|
||||
{
|
||||
writer.writeBits(bv.first, bv.second);
|
||||
}
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
{
|
||||
ReadBufferFromMemory read_buffer(data.data(), data.size());
|
||||
// auto memory_read_buffer = memory_write_buffer.tryGetReadBuffer();
|
||||
|
||||
if (expected_buffer_binary != std::string{})
|
||||
{
|
||||
const auto actual_buffer_binary = dumpContents(data, " ", " ");
|
||||
ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
|
||||
}
|
||||
|
||||
BitReader reader(read_buffer);
|
||||
|
||||
int item = 0;
|
||||
for (const auto & bv : bits_and_vals)
|
||||
{
|
||||
SCOPED_TRACE(::testing::Message()
|
||||
<< "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
|
||||
<< ", value: " << bin(bv.second)
|
||||
<< ".\n\n\nBuffer memory:\n" << dumpContents(data));
|
||||
|
||||
//EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
|
||||
EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));
|
||||
|
||||
++item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Simple,
|
||||
BitIO,
|
||||
::testing::Values(
|
||||
TestCaseParameter(
|
||||
{{9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}},
|
||||
"11111111 10000000 00111111 11100000 00001111 11111000 "),
|
||||
TestCaseParameter(
|
||||
{{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
|
||||
"01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
|
||||
TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
|
||||
TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
|
||||
TestCaseParameter({{24, 0xFFFFFFFF}},
|
||||
"11111111 11111111 11111111 ")
|
||||
),);
|
||||
|
||||
TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
|
||||
{
|
||||
std::vector<std::pair<UInt8, UInt64>> test_data;
|
||||
|
||||
{
|
||||
for (UInt8 r = 0; r < repeat_times; ++r)
|
||||
{
|
||||
for (const auto p : PRIMES)
|
||||
{
|
||||
test_data.emplace_back(p, pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TestCaseParameter(test_data);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Primes,
|
||||
BitIO,
|
||||
::testing::Values(
|
||||
primes_case(11, 0xFFFFFFFFFFFFFFFFULL),
|
||||
primes_case(11, BIT_PATTERN)
|
||||
),);
|
||||
|
||||
TEST(BitHelpers, maskLowBits)
|
||||
{
|
||||
EXPECT_EQ(0b00000111, ::maskLowBits<UInt8>(3));
|
||||
EXPECT_EQ(0b01111111, ::maskLowBits<UInt8>(7));
|
||||
EXPECT_EQ(0b0000000001111111, ::maskLowBits<UInt16>(7));
|
||||
EXPECT_EQ(0b0001111111111111, ::maskLowBits<UInt16>(13));
|
||||
EXPECT_EQ(0b00000111111111111111111111111111, ::maskLowBits<UInt32>(27));
|
||||
EXPECT_EQ(0b111111111111111111111111111111111, ::maskLowBits<UInt64>(33));
|
||||
EXPECT_EQ(0b11111111111111111111111111111111111, ::maskLowBits<UInt64>(35));
|
||||
|
||||
EXPECT_EQ(0xFF, ::maskLowBits<UInt8>(8));
|
||||
EXPECT_EQ(0xFFFF, ::maskLowBits<UInt16>(16));
|
||||
EXPECT_EQ(0xFFFFFFFF, ::maskLowBits<UInt32>(32));
|
||||
EXPECT_EQ(0xFFFFFFFFFFFFFFFF, ::maskLowBits<UInt64>(64));
|
||||
}
|
@ -14,9 +14,9 @@ Don't use Docker from your system repository.
|
||||
|
||||
* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip`
|
||||
* [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest`
|
||||
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf`
|
||||
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf pytest-timeout`
|
||||
|
||||
(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-pymongo python-tzlocal python-kazoo python-psycopg2 python-kafka`
|
||||
(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-pymongo python-tzlocal python-kazoo python-psycopg2 python-kafka python-pytest-timeout`
|
||||
|
||||
If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login.
|
||||
(You must close all your sessions (for example, restart your computer))
|
||||
|
@ -34,7 +34,7 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike
|
||||
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
|
@ -1,3 +1,4 @@
|
||||
[pytest]
|
||||
python_files = test.py
|
||||
norecursedirs = _instances
|
||||
timeout = 600
|
||||
|
@ -260,24 +260,31 @@ def execute_task(task, cmd_options):
|
||||
|
||||
# Tests
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_copy1_simple(started_cluster):
|
||||
execute_task(Task1(started_cluster), [])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_copy1_with_recovering(started_cluster):
|
||||
execute_task(Task1(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_copy_month_to_week_partition(started_cluster):
|
||||
execute_task(Task2(started_cluster), [])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_copy_month_to_week_partition_with_recovering(started_cluster):
|
||||
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_block_size(started_cluster):
|
||||
execute_task(Task_test_block_size(started_cluster), [])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_no_index(started_cluster):
|
||||
execute_task(Task_no_index(started_cluster), [])
|
||||
|
||||
@pytest.mark.skip(reason="Fails under asan")
|
||||
def test_no_arg(started_cluster):
|
||||
execute_task(Task_no_arg(started_cluster), [])
|
||||
|
||||
|
@ -51,6 +51,7 @@ def test_deduplication_window_in_seconds(started_cluster):
|
||||
|
||||
|
||||
# Currently this test just reproduce incorrect behavior that sould be fixed
|
||||
@pytest.mark.skip(reason="Flapping test")
|
||||
def test_deduplication_works_in_case_of_intensive_inserts(started_cluster):
|
||||
inserters = []
|
||||
fetchers = []
|
||||
|
@ -78,7 +78,7 @@ def test_reconnect(started_cluster):
|
||||
|
||||
assert remote.query("SELECT count(*) FROM local1").strip() == '3'
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Flapping test")
|
||||
def test_inserts_batching(started_cluster):
|
||||
instance = instance_test_inserts_batching
|
||||
|
||||
|
@ -318,7 +318,7 @@ def test_kafka_materialized_view(kafka_cluster):
|
||||
DROP TABLE test.view;
|
||||
''')
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Hungs")
|
||||
def test_kafka_flush_on_big_message(kafka_cluster):
|
||||
# Create batchs of messages of size ~100Kb
|
||||
kafka_messages = 10000
|
||||
|
67
dbms/tests/performance/codec_double_delta.xml
Normal file
67
dbms/tests/performance/codec_double_delta.xml
Normal file
@ -0,0 +1,67 @@
|
||||
<test>
|
||||
<name>IPv4 Functions</name>
|
||||
|
||||
<type>once</type>
|
||||
<stop_conditions>
|
||||
<any_of>
|
||||
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
|
||||
<total_time_ms>2000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>table_suffix</name>
|
||||
<values>
|
||||
<value>dd</value>
|
||||
<value>lz4</value>
|
||||
<value>dd_lz4</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_dd (n UInt64 CODEC(DoubleDelta, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_dd AS seq_dd;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_dd AS seq_dd;</create_query>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_lz4 (n UInt64 CODEC(LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_lz4 AS seq_lz4;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_lz4 AS seq_lz4;</create_query>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_dd_lz4 (n UInt64 CODEC(DoubleDelta, LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_dd_lz4 AS seq_dd_lz4;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_dd_lz4 AS seq_dd_lz4;</create_query>
|
||||
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT number FROM system.numbers LIMIT 100000</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT number*67+(rand()%67) FROM system.numbers LIMIT 100000</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT rand() FROM system.numbers LIMIT 100000</fill_query>
|
||||
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
|
||||
|
||||
<query>INSERT INTO seq_{table_suffix} (n) SELECT number FROM system.numbers SETTINGS max_threads=1</query>
|
||||
<query>INSERT INTO mon_{table_suffix} (n) SELECT number*67+(rand()%67) FROM system.numbers SETTINGS max_threads=1</query>
|
||||
<query>INSERT INTO rnd_{table_suffix} (n) SELECT rand() FROM system.numbers SETTINGS max_threads=1</query>
|
||||
|
||||
|
||||
<query>SELECT count(n) FROM seq_{table_suffix} SETTINGS max_threads=1</query>
|
||||
<query>SELECT count(n) FROM mon_{table_suffix} SETTINGS max_threads=1</query>
|
||||
<query>SELECT count(n) FROM rnd_{table_suffix} SETTINGS max_threads=1</query>
|
||||
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS seq_{table_suffix}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS mon_{table_suffix}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS rnd_{table_suffix}</drop_query>
|
||||
|
||||
</test>
|
67
dbms/tests/performance/codec_gorilla.xml
Normal file
67
dbms/tests/performance/codec_gorilla.xml
Normal file
@ -0,0 +1,67 @@
|
||||
<test>
|
||||
<name>IPv4 Functions</name>
|
||||
|
||||
<type>once</type>
|
||||
<stop_conditions>
|
||||
<any_of>
|
||||
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
|
||||
<total_time_ms>2000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>table_suffix</name>
|
||||
<values>
|
||||
<value>g</value>
|
||||
<value>lz4</value>
|
||||
<value>g_lz4</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_g (n UInt64 CODEC(Gorilla, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_g AS seq_g;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_g AS seq_g;</create_query>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_lz4 (n UInt64 CODEC(LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_lz4 AS seq_lz4;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_lz4 AS seq_lz4;</create_query>
|
||||
|
||||
<create_query>CREATE TABLE IF NOT EXISTS seq_g_lz4 (n UInt64 CODEC(Gorilla, LZ4, NONE)) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS mon_g_lz4 AS seq_g_lz4;</create_query>
|
||||
<create_query>CREATE TABLE IF NOT EXISTS rnd_g_lz4 AS seq_g_lz4;</create_query>
|
||||
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT number/pi() FROM system.numbers LIMIT 100000</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT number+sin(number) FROM system.numbers LIMIT 100000</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT (rand() - 4294967295)/pi() FROM system.numbers LIMIT 100000</fill_query>
|
||||
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO seq_{table_suffix} (n) SELECT n FROM seq_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO mon_{table_suffix} (n) SELECT n FROM mon_{table_suffix}</fill_query>
|
||||
<fill_query>INSERT INTO rnd_{table_suffix} (n) SELECT n FROM rnd_{table_suffix}</fill_query>
|
||||
|
||||
|
||||
<query>INSERT INTO seq_{table_suffix} (n) SELECT number/pi() FROM system.numbers SETTINGS max_threads=1</query>
|
||||
<query>INSERT INTO mon_{table_suffix} (n) SELECT number+sin(number) FROM system.numbers SETTINGS max_threads=1</query>
|
||||
<query>INSERT INTO rnd_{table_suffix} (n) SELECT (rand() - 4294967295)/pi() FROM system.numbers SETTINGS max_threads=1</query>
|
||||
|
||||
|
||||
<query>SELECT count(n) FROM seq_{table_suffix} SETTINGS max_threads=1</query>
|
||||
<query>SELECT count(n) FROM mon_{table_suffix} SETTINGS max_threads=1</query>
|
||||
<query>SELECT count(n) FROM rnd_{table_suffix} SETTINGS max_threads=1</query>
|
||||
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS seq_{table_suffix}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS mon_{table_suffix}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS rnd_{table_suffix}</drop_query>
|
||||
|
||||
</test>
|
@ -0,0 +1,10 @@
|
||||
U64
|
||||
U32
|
||||
U16
|
||||
U8
|
||||
I64
|
||||
I32
|
||||
I16
|
||||
I8
|
||||
DT
|
||||
D
|
151
dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql
Normal file
151
dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql
Normal file
@ -0,0 +1,151 @@
|
||||
USE test;
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
||||
|
||||
CREATE TABLE codecTest (
|
||||
key UInt64,
|
||||
ref_valueU64 UInt64,
|
||||
ref_valueU32 UInt32,
|
||||
ref_valueU16 UInt16,
|
||||
ref_valueU8 UInt8,
|
||||
ref_valueI64 Int64,
|
||||
ref_valueI32 Int32,
|
||||
ref_valueI16 Int16,
|
||||
ref_valueI8 Int8,
|
||||
ref_valueDT DateTime,
|
||||
ref_valueD Date,
|
||||
valueU64 UInt64 CODEC(DoubleDelta),
|
||||
valueU32 UInt32 CODEC(DoubleDelta),
|
||||
valueU16 UInt16 CODEC(DoubleDelta),
|
||||
valueU8 UInt8 CODEC(DoubleDelta),
|
||||
valueI64 Int64 CODEC(DoubleDelta),
|
||||
valueI32 Int32 CODEC(DoubleDelta),
|
||||
valueI16 Int16 CODEC(DoubleDelta),
|
||||
valueI8 Int8 CODEC(DoubleDelta),
|
||||
valueDT DateTime CODEC(DoubleDelta),
|
||||
valueD Date CODEC(DoubleDelta)
|
||||
) Engine = MergeTree ORDER BY key;
|
||||
|
||||
|
||||
-- checking for overflow
|
||||
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueI64, valueI64)
|
||||
VALUES (101, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807), (202, 0, 0, -9223372036854775808, -9223372036854775808), (203, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807);
|
||||
|
||||
-- n^3 covers all double delta storage cases, from small difference between neighbouref_values (stride) to big.
|
||||
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
|
||||
SELECT number as n, n * n * n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
|
||||
FROM system.numbers LIMIT 101, 100;
|
||||
|
||||
-- best case - constant stride
|
||||
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
|
||||
SELECT number as n, n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
|
||||
FROM system.numbers LIMIT 201, 100;
|
||||
|
||||
|
||||
-- worst case - random stride
|
||||
INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD)
|
||||
SELECT number as n, n + (rand64() - 9223372036854775807)/1000 as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v)
|
||||
FROM system.numbers LIMIT 301, 100;
|
||||
|
||||
|
||||
SELECT 'U64';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueU64, valueU64, ref_valueU64 - valueU64 as dU64
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dU64 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'U32';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueU32, valueU32, ref_valueU32 - valueU32 as dU32
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dU32 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'U16';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueU16, valueU16, ref_valueU16 - valueU16 as dU16
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dU16 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'U8';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueU8, valueU8, ref_valueU8 - valueU8 as dU8
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dU8 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'I64';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueI64, valueI64, ref_valueI64 - valueI64 as dI64
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dI64 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'I32';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueI32, valueI32, ref_valueI32 - valueI32 as dI32
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dI32 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'I16';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueI16, valueI16, ref_valueI16 - valueI16 as dI16
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dI16 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'I8';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueI8, valueI8, ref_valueI8 - valueI8 as dI8
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dI8 != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'DT';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueDT, valueDT, ref_valueDT - valueDT as dDT
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dDT != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'D';
|
||||
SELECT
|
||||
key,
|
||||
ref_valueD, valueD, ref_valueD - valueD as dD
|
||||
FROM codecTest
|
||||
WHERE
|
||||
dD != 0
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
@ -0,0 +1,2 @@
|
||||
F64
|
||||
F32
|
63
dbms/tests/queries/0_stateless/00950_test_gorilla_codec.sql
Normal file
63
dbms/tests/queries/0_stateless/00950_test_gorilla_codec.sql
Normal file
@ -0,0 +1,63 @@
|
||||
USE test;
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
||||
|
||||
CREATE TABLE codecTest (
|
||||
key UInt64,
|
||||
name String,
|
||||
ref_valueF64 Float64,
|
||||
ref_valueF32 Float32,
|
||||
valueF64 Float64 CODEC(Gorilla),
|
||||
valueF32 Float32 CODEC(Gorilla)
|
||||
) Engine = MergeTree ORDER BY key;
|
||||
|
||||
-- best case - same value
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
|
||||
|
||||
-- good case - values that grow insignificantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
|
||||
|
||||
-- bad case - values differ significantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
|
||||
|
||||
-- worst case - almost like a random values
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
|
||||
|
||||
|
||||
-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
|
||||
|
||||
-- referencing previous row key, value, and case name to simplify debugging.
|
||||
SELECT 'F64';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF64
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF64 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'F32';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF32
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF32 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
2
debian/clickhouse-server.config
vendored
2
debian/clickhouse-server.config
vendored
@ -1,6 +1,6 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
. /usr/share/debconf/confmodule
|
||||
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
|
||||
|
||||
db_fget clickhouse-server/default-password seen || true
|
||||
password_seen="$RET"
|
||||
|
46
debian/clickhouse-server.postinst
vendored
46
debian/clickhouse-server.postinst
vendored
@ -11,17 +11,25 @@ CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
|
||||
OS=${OS=`lsb_release -is 2>/dev/null || uname -s ||:`}
|
||||
OS=${OS=`lsb_release -is 2>/dev/null ||:`}
|
||||
if [ -z "$OS" ]; then
|
||||
test -f /etc/os-release && . /etc/os-release && OS=$ID
|
||||
fi
|
||||
OS=${OS=`uname -s ||:`}
|
||||
|
||||
. /usr/share/debconf/confmodule
|
||||
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
|
||||
|
||||
test -f /etc/default/clickhouse && . /etc/default/clickhouse
|
||||
|
||||
if [ "$1" = configure ]; then
|
||||
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ] || [ "$OS" = "CentOS" ] || [ "$OS" = "Fedora" ]; then
|
||||
is_rh=1
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$is_rh" ]; then
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
# if old rc.d service present - remove it
|
||||
if [ -x "/etc/init.d/clickhouse-server" ]; then
|
||||
update-rc.d clickhouse-server remove
|
||||
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server remove
|
||||
echo "ClickHouse init script has migrated to systemd. Please manually stop old server and restart the service: sudo killall clickhouse-server && sleep 5 && sudo service clickhouse-server restart"
|
||||
fi
|
||||
|
||||
@ -30,17 +38,17 @@ if [ "$1" = configure ]; then
|
||||
else
|
||||
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
|
||||
if [ -x "/etc/init.d/clickhouse-server" ]; then
|
||||
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then
|
||||
echo # TODO
|
||||
if [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
|
||||
else
|
||||
update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
|
||||
echo # TODO [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Make sure the administrative user exists
|
||||
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
|
||||
if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then
|
||||
if [ -n "$is_rh" ]; then
|
||||
adduser --system --no-create-home --home /nonexistent \
|
||||
--shell /bin/false ${CLICKHOUSE_USER} > /dev/null
|
||||
else
|
||||
@ -123,14 +131,16 @@ Please fix this and reinstall this package." >&2
|
||||
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
|
||||
fi
|
||||
|
||||
db_get clickhouse-server/default-password
|
||||
defaultpassword="$RET"
|
||||
if [ -n "$defaultpassword" ]; then
|
||||
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
fi
|
||||
if [ -f /usr/share/debconf/confmodule ]; then
|
||||
db_get clickhouse-server/default-password
|
||||
defaultpassword="$RET"
|
||||
if [ -n "$defaultpassword" ]; then
|
||||
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
fi
|
||||
|
||||
# everything went well, so now let's reset the password
|
||||
db_set clickhouse-server/default-password ""
|
||||
# ... done with debconf here
|
||||
db_stop
|
||||
# everything went well, so now let's reset the password
|
||||
db_set clickhouse-server/default-password ""
|
||||
# ... done with debconf here
|
||||
db_stop
|
||||
fi
|
||||
fi
|
||||
|
@ -101,6 +101,8 @@ Possible `level` range: \[3, 12\]. Default value: 9. Greater values stands for b
|
||||
Greater values stands for better compression and higher CPU usage.
|
||||
- `Delta(delta_bytes)` - compression approach when raw values are replace with difference of two neighbour values. Up to `delta_bytes` are used for storing delta value.
|
||||
Possible `delta_bytes` values: 1, 2, 4, 8. Default value for delta bytes is `sizeof(type)`, if it is equals to 1, 2, 4, 8 and equals to 1 otherwise.
|
||||
- `DoubleDelta` - stores delta of deltas in compact binary form, compressing values down to 1 bit (in the best case). Best compression rates are achieved on monotonic sequences with constant stride, e.g. time samples. Can be used against any fixed-width type. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix instead of 4-bit prefix.
|
||||
- `Gorilla` - stores (parts of) xored values in compact binary form, compressing values down to 1 bit (in the best case). Best compression rate is achieved when neighbouring values are binary equal. Basic use case - floating point data that do not change rapidly. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types.
|
||||
|
||||
Syntax example:
|
||||
```
|
||||
|
86
release
86
release
@ -31,8 +31,8 @@
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
cd $CURDIR
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
cd $CUR_DIR
|
||||
|
||||
source "./utils/release/release_lib.sh"
|
||||
|
||||
@ -68,6 +68,12 @@ do
|
||||
# Wrong but fast pbuilder mode: create base package with all depends
|
||||
EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libicu-dev libreadline-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd"
|
||||
shift
|
||||
elif [[ $1 == '--rpm' ]]; then
|
||||
MAKE_RPM=1
|
||||
shift
|
||||
elif [[ $1 == '--tgz' ]]; then
|
||||
MAKE_TGZ=1
|
||||
shift
|
||||
else
|
||||
echo "Unknown option $1"
|
||||
exit 2
|
||||
@ -109,40 +115,50 @@ echo -e "\nCurrent version is $VERSION_STRING"
|
||||
|
||||
gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
|
||||
|
||||
if [ -z "$USE_PBUILDER" ] ; then
|
||||
DEB_CC=${DEB_CC:=`which gcc-7 gcc-8 gcc | head -n1`}
|
||||
DEB_CXX=${DEB_CXX:=`which g++-7 g++-8 g++ | head -n1`}
|
||||
# Build (only binary packages).
|
||||
debuild --preserve-env -e PATH \
|
||||
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
|
||||
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
|
||||
else
|
||||
export DIST=${DIST:=bionic}
|
||||
export SET_BUILDRESULT=${SET_BUILDRESULT:=$CURDIR/..}
|
||||
if [ -z "$NO_BUILD" ] ; then
|
||||
if [ -z "$USE_PBUILDER" ] ; then
|
||||
DEB_CC=${DEB_CC:=`which gcc-7 gcc-8 gcc | head -n1`}
|
||||
DEB_CXX=${DEB_CXX:=`which g++-7 g++-8 g++ | head -n1`}
|
||||
# Build (only binary packages).
|
||||
debuild --preserve-env -e PATH \
|
||||
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
|
||||
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
|
||||
else
|
||||
export DIST=${DIST:=bionic}
|
||||
export SET_BUILDRESULT=${SET_BUILDRESULT:=$CUR_DIR/..}
|
||||
|
||||
if [[ -z `which pbuilder` ]] ; then
|
||||
sudo apt install -y pbuilder devscripts ccache fakeroot debhelper debian-archive-keyring debian-keyring lsb-release
|
||||
fi
|
||||
|
||||
. $CURDIR/debian/.pbuilderrc
|
||||
|
||||
if [[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ]] ; then
|
||||
sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
|
||||
fi
|
||||
|
||||
if [[ -n "$FORCE_PBUILDER_CREATE" || ! -e "$BASETGZ" ]] ; then
|
||||
echo Creating base system $BASETGZ
|
||||
[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ] && sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
|
||||
sudo --preserve-env bash -x pbuilder create --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT
|
||||
fi
|
||||
|
||||
if [ "$PBUILDER_AUTOUPDATE" -gt 0 ]; then
|
||||
# Update every 3 days (60*24*3 minutes)
|
||||
if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +$PBUILDER_AUTOUPDATE` ; then
|
||||
echo Updating base system $BASETGZ
|
||||
sudo --preserve-env pbuilder update --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT
|
||||
if [[ -z `which pbuilder` ]] ; then
|
||||
sudo apt install -y pbuilder devscripts ccache fakeroot debhelper debian-archive-keyring debian-keyring lsb-release
|
||||
fi
|
||||
fi
|
||||
|
||||
pdebuild --configfile $CURDIR/debian/.pbuilderrc -- $PBUILDER_OPT
|
||||
. $CUR_DIR/debian/.pbuilderrc
|
||||
|
||||
if [[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ]] ; then
|
||||
sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
|
||||
fi
|
||||
|
||||
if [[ -n "$FORCE_PBUILDER_CREATE" || ! -e "$BASETGZ" ]] ; then
|
||||
echo Creating base system $BASETGZ
|
||||
[ ! -e "/usr/share/debootstrap/scripts/${DIST}" ] && sudo ln -s gutsy /usr/share/debootstrap/scripts/${DIST}
|
||||
sudo --preserve-env bash -x pbuilder create --configfile $CUR_DIR/debian/.pbuilderrc $PBUILDER_OPT
|
||||
fi
|
||||
|
||||
if [ "$PBUILDER_AUTOUPDATE" -gt 0 ]; then
|
||||
# Update every 3 days (60*24*3 minutes)
|
||||
if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +$PBUILDER_AUTOUPDATE` ; then
|
||||
echo Updating base system $BASETGZ
|
||||
sudo --preserve-env pbuilder update --configfile $CUR_DIR/debian/.pbuilderrc $PBUILDER_OPT
|
||||
fi
|
||||
fi
|
||||
|
||||
pdebuild --configfile $CUR_DIR/debian/.pbuilderrc -- $PBUILDER_OPT
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$MAKE_RPM" ]; then
|
||||
make_rpm
|
||||
fi
|
||||
|
||||
if [ -n "$MAKE_TGZ" ]; then
|
||||
make_tgz
|
||||
fi
|
||||
|
@ -179,3 +179,92 @@ function gen_dockerfiles {
|
||||
VERSION_STRING="$1"
|
||||
ls -1 docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='$VERSION_STRING'/'
|
||||
}
|
||||
|
||||
function make_rpm {
|
||||
get_version
|
||||
VERSION_STRING+=$VERSION_POSTFIX
|
||||
VERSION=$VERSION_STRING
|
||||
PACKAGE_DIR=../
|
||||
|
||||
function deb_unpack {
|
||||
rm -rf $PACKAGE-$VERSION
|
||||
alien --verbose --generate --to-rpm --scripts ${PACKAGE_DIR}${PACKAGE}_${VERSION}_${ARCH}.deb
|
||||
cd $PACKAGE-$VERSION
|
||||
mv ${PACKAGE}-$VERSION-2.spec ${PACKAGE}-$VERSION-2.spec.tmp
|
||||
cat ${PACKAGE}-$VERSION-2.spec.tmp \
|
||||
| grep -vF '%dir "/"' \
|
||||
| grep -vF '%dir "/usr/"' \
|
||||
| grep -vF '%dir "/usr/bin/"' \
|
||||
| grep -vF '%dir "/usr/lib/"' \
|
||||
| grep -vF '%dir "/usr/lib/debug/"' \
|
||||
| grep -vF '%dir "/usr/lib/.build-id/"' \
|
||||
| grep -vF '%dir "/usr/share/"' \
|
||||
| grep -vF '%dir "/usr/share/doc/"' \
|
||||
| grep -vF '%dir "/lib/"' \
|
||||
| grep -vF '%dir "/lib/systemd/"' \
|
||||
| grep -vF '%dir "/lib/systemd/system/"' \
|
||||
| grep -vF '%dir "/etc/"' \
|
||||
| grep -vF '%dir "/etc/security/"' \
|
||||
| grep -vF '%dir "/etc/security/limits.d/"' \
|
||||
| grep -vF '%dir "/etc/init.d/"' \
|
||||
| grep -vF '%dir "/etc/cron.d/"' \
|
||||
| grep -vF '%dir "/etc/systemd/system/"' \
|
||||
| grep -vF '%dir "/etc/systemd/"' \
|
||||
> ${PACKAGE}-$VERSION-2.spec
|
||||
}
|
||||
|
||||
function rpm_pack {
|
||||
rpmbuild --buildroot="$CUR_DIR/${PACKAGE}-$VERSION" -bb --target ${TARGET} "${PACKAGE}-$VERSION-2.spec"
|
||||
cd $CUR_DIR
|
||||
}
|
||||
|
||||
function unpack_pack {
|
||||
deb_unpack
|
||||
rpm_pack
|
||||
}
|
||||
|
||||
PACKAGE=clickhouse-server
|
||||
ARCH=all
|
||||
TARGET=noarch
|
||||
unpack_pack
|
||||
|
||||
PACKAGE=clickhouse-client
|
||||
ARCH=all
|
||||
TARGET=noarch
|
||||
unpack_pack
|
||||
|
||||
PACKAGE=clickhouse-test
|
||||
ARCH=all
|
||||
TARGET=noarch
|
||||
deb_unpack
|
||||
mv ${PACKAGE}-$VERSION-2.spec ${PACKAGE}-$VERSION-2.spec_tmp
|
||||
echo "Requires: python2" >> ${PACKAGE}-$VERSION-2.spec
|
||||
#echo "Requires: python2-termcolor" >> ${PACKAGE}-$VERSION-2.spec
|
||||
cat ${PACKAGE}-$VERSION-2.spec_tmp >> ${PACKAGE}-$VERSION-2.spec
|
||||
rpm_pack
|
||||
|
||||
PACKAGE=clickhouse-common-static
|
||||
ARCH=amd64
|
||||
TARGET=x86_64
|
||||
unpack_pack
|
||||
|
||||
PACKAGE=clickhouse-common-static-dbg
|
||||
ARCH=amd64
|
||||
TARGET=x86_64
|
||||
unpack_pack
|
||||
|
||||
mv clickhouse-*-${VERSION_STRING}-2.*.rpm ${PACKAGE_DIR}
|
||||
}
|
||||
|
||||
function make_tgz {
|
||||
get_version
|
||||
VERSION_STRING+=$VERSION_POSTFIX
|
||||
VERSION=$VERSION_STRING
|
||||
PACKAGE_DIR=../
|
||||
|
||||
for PACKAGE in clickhouse-server clickhouse-client clickhouse-test clickhouse-common-static clickhouse-common-static-dbg; do
|
||||
alien --verbose --to-tgz ${PACKAGE_DIR}${PACKAGE}_${VERSION}_*.deb
|
||||
done
|
||||
|
||||
mv clickhouse-*-${VERSION_STRING}.tgz ${PACKAGE_DIR}
|
||||
}
|
||||
|
@ -94,7 +94,7 @@
|
||||
</div>
|
||||
<div id="announcement" class="colored-block">
|
||||
<div class="page">
|
||||
Upcoming Meetups: <a class="announcement-link" href="https://events.yandex.ru/events/ClickHouse/26-June-2019/" rel="external nofollow" target="_blank">Novosibirsk</a> on June 26 and <a class="announcement-link" href="https://www.huodongxing.com/event/3483759917300" rel="external nofollow" target="_blank">Shenzhen</a> on October 20
|
||||
Upcoming Meetups: <a class="announcement-link" href="https://yandex.ru/promo/metrica/clickhouse-minsk" rel="external nofollow" target="_blank">Minsk</a> on July 11 and <a class="announcement-link" href="https://www.huodongxing.com/event/3483759917300" rel="external nofollow" target="_blank">Shenzhen</a> on October 20
|
||||
</div>
|
||||
</div>
|
||||
<div class="page">
|
||||
|
Loading…
Reference in New Issue
Block a user