mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Merge pull request #37553 from koloshmet/fpc_codec
FPC Codec for floating point data
This commit is contained in:
commit
9794098ebb
511
src/Compression/CompressionCodecFPC.cpp
Normal file
511
src/Compression/CompressionCodecFPC.cpp
Normal file
@ -0,0 +1,511 @@
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <span>
|
||||
#include <bit>
|
||||
#include <concepts>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecFPC : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
CompressionCodecFPC(UInt8 float_size, UInt8 compression_level);
|
||||
|
||||
uint8_t getMethodByte() const override;
|
||||
|
||||
void updateHash(SipHash & hash) const override;
|
||||
|
||||
static constexpr UInt8 MAX_COMPRESSION_LEVEL{28};
|
||||
static constexpr UInt8 DEFAULT_COMPRESSION_LEVEL{12};
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return false; }
|
||||
|
||||
private:
|
||||
static constexpr UInt32 HEADER_SIZE{3};
|
||||
|
||||
UInt8 float_width; // size of uncompressed float in bytes
|
||||
UInt8 level; // compression level, 2^level * float_width is the size of predictors table in bytes
|
||||
};
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
uint8_t CompressionCodecFPC::getMethodByte() const
|
||||
{
|
||||
return static_cast<uint8_t>(CompressionMethodByte::FPC);
|
||||
}
|
||||
|
||||
void CompressionCodecFPC::updateHash(SipHash & hash) const
|
||||
{
|
||||
getCodecDesc()->updateTreeHash(hash);
|
||||
}
|
||||
|
||||
CompressionCodecFPC::CompressionCodecFPC(UInt8 float_size, UInt8 compression_level)
|
||||
: float_width{float_size}, level{compression_level}
|
||||
{
|
||||
setCodecDescription("FPC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecFPC::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
auto float_count = (uncompressed_size + float_width - 1) / float_width;
|
||||
if (float_count % 2 != 0)
|
||||
++float_count;
|
||||
return HEADER_SIZE + float_count * float_width + float_count / 2;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
UInt8 getFloatBytesSize(const IDataType & column_type)
|
||||
{
|
||||
if (!WhichDataType(column_type).isFloat())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "FPC codec is not applicable for {} because the data type is not float",
|
||||
column_type.getName());
|
||||
}
|
||||
|
||||
if (auto float_size = column_type.getSizeOfValueInMemory(); float_size >= 4)
|
||||
{
|
||||
return static_cast<UInt8>(float_size);
|
||||
}
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "FPC codec is not applicable for floats of size less than 4 bytes. Given type {}",
|
||||
column_type.getName());
|
||||
}
|
||||
|
||||
std::byte encodeEndianness(std::endian endian)
|
||||
{
|
||||
switch (endian)
|
||||
{
|
||||
case std::endian::little:
|
||||
return std::byte{0};
|
||||
case std::endian::big:
|
||||
return std::byte{1};
|
||||
}
|
||||
throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
std::endian decodeEndianness(std::byte endian)
|
||||
{
|
||||
switch (std::to_integer<unsigned char>(endian))
|
||||
{
|
||||
case 0:
|
||||
return std::endian::little;
|
||||
case 1:
|
||||
return std::endian::big;
|
||||
}
|
||||
throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerCodecFPC(CompressionCodecFactory & factory)
|
||||
{
|
||||
auto method_code = static_cast<UInt8>(CompressionMethodByte::FPC);
|
||||
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
|
||||
{
|
||||
UInt8 float_width{0};
|
||||
if (column_type != nullptr)
|
||||
float_width = getFloatBytesSize(*column_type);
|
||||
|
||||
UInt8 level = CompressionCodecFPC::DEFAULT_COMPRESSION_LEVEL;
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() > 1)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE,
|
||||
"FPC codec must have 1 parameter, given {}", arguments->children.size());
|
||||
}
|
||||
|
||||
const auto * literal = arguments->children.front()->as<ASTLiteral>();
|
||||
if (!literal)
|
||||
throw Exception("FPC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
level = literal->value.safeGet<UInt8>();
|
||||
if (level == 0)
|
||||
throw Exception("FPC codec level must be at least 1", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
if (level > CompressionCodecFPC::MAX_COMPRESSION_LEVEL)
|
||||
throw Exception("FPC codec level must be at most 28", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
}
|
||||
return std::make_shared<CompressionCodecFPC>(float_width, level);
|
||||
};
|
||||
factory.registerCompressionCodecWithType("FPC", method_code, codec_builder);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <std::unsigned_integral TUint>
|
||||
requires (sizeof(TUint) >= 4)
|
||||
class DfcmPredictor
|
||||
{
|
||||
public:
|
||||
explicit DfcmPredictor(std::size_t table_size): table(table_size, 0), prev_value{0}, hash{0}
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
TUint predict() const noexcept
|
||||
{
|
||||
return table[hash] + prev_value;
|
||||
}
|
||||
|
||||
void add(TUint value) noexcept
|
||||
{
|
||||
table[hash] = value - prev_value;
|
||||
recalculateHash();
|
||||
prev_value = value;
|
||||
}
|
||||
|
||||
private:
|
||||
void recalculateHash() noexcept
|
||||
{
|
||||
auto value = table[hash];
|
||||
if constexpr (sizeof(TUint) >= 8)
|
||||
{
|
||||
hash = ((hash << 2) ^ static_cast<std::size_t>(value >> 40)) & (table.size() - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
hash = ((hash << 4) ^ static_cast<std::size_t>(value >> 23)) & (table.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TUint> table;
|
||||
TUint prev_value;
|
||||
std::size_t hash;
|
||||
};
|
||||
|
||||
template <std::unsigned_integral TUint>
|
||||
requires (sizeof(TUint) >= 4)
|
||||
class FcmPredictor
|
||||
{
|
||||
public:
|
||||
explicit FcmPredictor(std::size_t table_size): table(table_size, 0), hash{0}
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
TUint predict() const noexcept
|
||||
{
|
||||
return table[hash];
|
||||
}
|
||||
|
||||
void add(TUint value) noexcept
|
||||
{
|
||||
table[hash] = value;
|
||||
recalculateHash();
|
||||
}
|
||||
|
||||
private:
|
||||
void recalculateHash() noexcept
|
||||
{
|
||||
auto value = table[hash];
|
||||
if constexpr (sizeof(TUint) >= 8)
|
||||
{
|
||||
hash = ((hash << 6) ^ static_cast<std::size_t>(value >> 48)) & (table.size() - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
hash = ((hash << 1) ^ static_cast<std::size_t>(value >> 22)) & (table.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TUint> table;
|
||||
std::size_t hash;
|
||||
};
|
||||
|
||||
template <std::unsigned_integral TUint, std::endian Endian = std::endian::native>
|
||||
requires (Endian == std::endian::little || Endian == std::endian::big)
|
||||
class FPCOperation
|
||||
{
|
||||
static constexpr std::size_t CHUNK_SIZE{64};
|
||||
|
||||
static constexpr auto VALUE_SIZE = sizeof(TUint);
|
||||
static constexpr std::byte FCM_BIT{0};
|
||||
static constexpr std::byte DFCM_BIT{1u << 3};
|
||||
static constexpr auto DFCM_BIT_1 = DFCM_BIT << 4;
|
||||
static constexpr auto DFCM_BIT_2 = DFCM_BIT;
|
||||
static constexpr unsigned MAX_ZERO_BYTE_COUNT{0b111u};
|
||||
|
||||
public:
|
||||
FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
|
||||
: dfcm_predictor(1u << compression_level), fcm_predictor(1u << compression_level), chunk{}, result{destination}
|
||||
{
|
||||
}
|
||||
|
||||
std::size_t encode(std::span<const std::byte> data) &&
|
||||
{
|
||||
auto initial_size = result.size();
|
||||
|
||||
std::span chunk_view(chunk);
|
||||
for (std::size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
|
||||
{
|
||||
auto written_values = importChunk(data.subspan(i), chunk_view);
|
||||
encodeChunk(chunk_view.subspan(0, written_values));
|
||||
}
|
||||
|
||||
return initial_size - result.size();
|
||||
}
|
||||
|
||||
void decode(std::span<const std::byte> values, std::size_t decoded_size) &&
|
||||
{
|
||||
std::size_t read_bytes{0};
|
||||
|
||||
std::span<TUint> chunk_view(chunk);
|
||||
for (std::size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
|
||||
{
|
||||
if (i + chunk_view.size_bytes() > decoded_size)
|
||||
chunk_view = chunk_view.first(ceilBytesToEvenValues(decoded_size - i));
|
||||
read_bytes += decodeChunk(values.subspan(read_bytes), chunk_view);
|
||||
exportChunk(chunk_view);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static std::size_t ceilBytesToEvenValues(std::size_t bytes_count)
|
||||
{
|
||||
auto values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
|
||||
return values_count % 2 == 0 ? values_count : values_count + 1;
|
||||
}
|
||||
|
||||
std::size_t importChunk(std::span<const std::byte> values, std::span<TUint> chnk)
|
||||
{
|
||||
if (auto chunk_view = std::as_writable_bytes(chnk); chunk_view.size() <= values.size())
|
||||
{
|
||||
std::memcpy(chunk_view.data(), values.data(), chunk_view.size());
|
||||
return chunk_view.size() / VALUE_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::memset(chunk_view.data(), 0, chunk_view.size());
|
||||
std::memcpy(chunk_view.data(), values.data(), values.size());
|
||||
return ceilBytesToEvenValues(values.size());
|
||||
}
|
||||
}
|
||||
|
||||
void exportChunk(std::span<const TUint> chnk)
|
||||
{
|
||||
auto chunk_view = std::as_bytes(chnk).first(std::min(result.size(), chnk.size_bytes()));
|
||||
std::memcpy(result.data(), chunk_view.data(), chunk_view.size());
|
||||
result = result.subspan(chunk_view.size());
|
||||
}
|
||||
|
||||
void encodeChunk(std::span<const TUint> seq)
|
||||
{
|
||||
for (std::size_t i = 0; i < seq.size(); i += 2)
|
||||
{
|
||||
encodePair(seq[i], seq[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
struct CompressedValue
|
||||
{
|
||||
TUint value;
|
||||
unsigned compressed_size;
|
||||
std::byte predictor;
|
||||
};
|
||||
|
||||
unsigned encodeCompressedZeroByteCount(int compressed)
|
||||
{
|
||||
if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
|
||||
{
|
||||
if (compressed >= 4)
|
||||
--compressed;
|
||||
}
|
||||
return std::min(static_cast<unsigned>(compressed), MAX_ZERO_BYTE_COUNT);
|
||||
}
|
||||
|
||||
unsigned decodeCompressedZeroByteCount(unsigned encoded_size)
|
||||
{
|
||||
if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
|
||||
{
|
||||
if (encoded_size > 3)
|
||||
++encoded_size;
|
||||
}
|
||||
return encoded_size;
|
||||
}
|
||||
|
||||
CompressedValue compressValue(TUint value) noexcept
|
||||
{
|
||||
static constexpr auto BITS_PER_BYTE = std::numeric_limits<unsigned char>::digits;
|
||||
|
||||
TUint compressed_dfcm = dfcm_predictor.predict() ^ value;
|
||||
TUint compressed_fcm = fcm_predictor.predict() ^ value;
|
||||
dfcm_predictor.add(value);
|
||||
fcm_predictor.add(value);
|
||||
auto zeroes_dfcm = std::countl_zero(compressed_dfcm);
|
||||
auto zeroes_fcm = std::countl_zero(compressed_fcm);
|
||||
if (zeroes_dfcm > zeroes_fcm)
|
||||
return {compressed_dfcm, encodeCompressedZeroByteCount(zeroes_dfcm / BITS_PER_BYTE), DFCM_BIT};
|
||||
return {compressed_fcm, encodeCompressedZeroByteCount(zeroes_fcm / BITS_PER_BYTE), FCM_BIT};
|
||||
}
|
||||
|
||||
void encodePair(TUint first, TUint second)
|
||||
{
|
||||
auto [value1, zero_byte_count1, predictor1] = compressValue(first);
|
||||
auto [value2, zero_byte_count2, predictor2] = compressValue(second);
|
||||
std::byte header{0x0};
|
||||
header |= (predictor1 << 4) | predictor2;
|
||||
header |= static_cast<std::byte>((zero_byte_count1 << 4) | zero_byte_count2);
|
||||
result.front() = header;
|
||||
|
||||
zero_byte_count1 = decodeCompressedZeroByteCount(zero_byte_count1);
|
||||
zero_byte_count2 = decodeCompressedZeroByteCount(zero_byte_count2);
|
||||
auto tail_size1 = VALUE_SIZE - zero_byte_count1;
|
||||
auto tail_size2 = VALUE_SIZE - zero_byte_count2;
|
||||
|
||||
std::memcpy(result.data() + 1, valueTail(value1, zero_byte_count1), tail_size1);
|
||||
std::memcpy(result.data() + 1 + tail_size1, valueTail(value2, zero_byte_count2), tail_size2);
|
||||
result = result.subspan(1 + tail_size1 + tail_size2);
|
||||
}
|
||||
|
||||
std::size_t decodeChunk(std::span<const std::byte> values, std::span<TUint> seq)
|
||||
{
|
||||
std::size_t read_bytes{0};
|
||||
for (std::size_t i = 0; i < seq.size(); i += 2)
|
||||
{
|
||||
read_bytes += decodePair(values.subspan(read_bytes), seq[i], seq[i + 1]);
|
||||
}
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
TUint decompressValue(TUint value, bool isDfcmPredictor)
|
||||
{
|
||||
TUint decompressed;
|
||||
if (isDfcmPredictor)
|
||||
{
|
||||
decompressed = dfcm_predictor.predict() ^ value;
|
||||
}
|
||||
else
|
||||
{
|
||||
decompressed = fcm_predictor.predict() ^ value;
|
||||
}
|
||||
dfcm_predictor.add(decompressed);
|
||||
fcm_predictor.add(decompressed);
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
std::size_t decodePair(std::span<const std::byte> bytes, TUint& first, TUint& second)
|
||||
{
|
||||
if (bytes.empty())
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
|
||||
|
||||
auto zero_byte_count1 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<unsigned>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
|
||||
auto zero_byte_count2 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<unsigned>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
|
||||
|
||||
auto tail_size1 = VALUE_SIZE - zero_byte_count1;
|
||||
auto tail_size2 = VALUE_SIZE - zero_byte_count2;
|
||||
|
||||
if (bytes.size() < 1 + tail_size1 + tail_size2)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
|
||||
|
||||
TUint value1{0};
|
||||
TUint value2{0};
|
||||
|
||||
std::memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
|
||||
std::memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
|
||||
|
||||
auto is_dfcm_predictor1 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_1) != 0;
|
||||
auto is_dfcm_predictor2 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_2) != 0;
|
||||
first = decompressValue(value1, is_dfcm_predictor1);
|
||||
second = decompressValue(value2, is_dfcm_predictor2);
|
||||
|
||||
return 1 + tail_size1 + tail_size2;
|
||||
}
|
||||
|
||||
static void* valueTail(TUint& value, unsigned compressed_size)
|
||||
{
|
||||
if constexpr (Endian == std::endian::little)
|
||||
{
|
||||
return &value;
|
||||
}
|
||||
else
|
||||
{
|
||||
return reinterpret_cast<std::byte*>(&value) + compressed_size;
|
||||
}
|
||||
}
|
||||
|
||||
DfcmPredictor<TUint> dfcm_predictor;
|
||||
FcmPredictor<TUint> fcm_predictor;
|
||||
std::array<TUint, CHUNK_SIZE> chunk{};
|
||||
std::span<std::byte> result{};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
dest[0] = static_cast<char>(float_width);
|
||||
dest[1] = static_cast<char>(level);
|
||||
dest[2] = std::to_integer<char>(encodeEndianness(std::endian::native));
|
||||
|
||||
auto dest_size = getMaxCompressedDataSize(source_size);
|
||||
auto destination = std::as_writable_bytes(std::span(dest, dest_size).subspan(HEADER_SIZE));
|
||||
auto src = std::as_bytes(std::span(source, source_size));
|
||||
switch (float_width)
|
||||
{
|
||||
case sizeof(Float64):
|
||||
return HEADER_SIZE + FPCOperation<UInt64>(destination, level).encode(src);
|
||||
case sizeof(Float32):
|
||||
return HEADER_SIZE + FPCOperation<UInt32>(destination, level).encode(src);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw Exception("Cannot compress. File has incorrect float width", ErrorCodes::CANNOT_COMPRESS);
|
||||
}
|
||||
|
||||
void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < HEADER_SIZE)
|
||||
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
|
||||
auto compressed_data = std::as_bytes(std::span(source, source_size));
|
||||
auto compressed_float_width = std::to_integer<UInt8>(compressed_data[0]);
|
||||
auto compressed_level = std::to_integer<UInt8>(compressed_data[1]);
|
||||
if (compressed_level == 0 || compressed_level > MAX_COMPRESSION_LEVEL)
|
||||
throw Exception("Cannot decompress. File has incorrect level", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
if (decodeEndianness(compressed_data[2]) != std::endian::native)
|
||||
throw Exception("Cannot decompress. File has incorrect endianness", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
|
||||
auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
|
||||
auto src = compressed_data.subspan(HEADER_SIZE);
|
||||
switch (compressed_float_width)
|
||||
{
|
||||
case sizeof(Float64):
|
||||
FPCOperation<UInt64>(destination, compressed_level).decode(src, uncompressed_size);
|
||||
break;
|
||||
case sizeof(Float32):
|
||||
FPCOperation<UInt32>(destination, compressed_level).decode(src, uncompressed_size);
|
||||
break;
|
||||
default:
|
||||
throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -177,6 +177,7 @@ void registerCodecT64(CompressionCodecFactory & factory);
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
||||
void registerCodecFPC(CompressionCodecFactory & factory);
|
||||
|
||||
#endif
|
||||
|
||||
@ -194,6 +195,7 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
registerCodecEncrypted(*this);
|
||||
registerCodecFPC(*this);
|
||||
#endif
|
||||
|
||||
default_codec = get("LZ4", {});
|
||||
|
@ -44,7 +44,8 @@ enum class CompressionMethodByte : uint8_t
|
||||
DoubleDelta = 0x94,
|
||||
Gorilla = 0x95,
|
||||
AES_128_GCM_SIV = 0x96,
|
||||
AES_256_GCM_SIV = 0x97
|
||||
AES_256_GCM_SIV = 0x97,
|
||||
FPC = 0x98
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
<value>ZSTD</value>
|
||||
<value>DoubleDelta</value>
|
||||
<value>Gorilla</value>
|
||||
<value>FPC</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
|
@ -12,6 +12,7 @@
|
||||
<value>ZSTD</value>
|
||||
<value>DoubleDelta</value>
|
||||
<value>Gorilla</value>
|
||||
<value>FPC</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
|
4
tests/queries/0_stateless/02313_test_fpc_codec.reference
Normal file
4
tests/queries/0_stateless/02313_test_fpc_codec.reference
Normal file
@ -0,0 +1,4 @@
|
||||
F64
|
||||
F32
|
||||
F64
|
||||
F32
|
121
tests/queries/0_stateless/02313_test_fpc_codec.sql
Normal file
121
tests/queries/0_stateless/02313_test_fpc_codec.sql
Normal file
@ -0,0 +1,121 @@
|
||||
DROP TABLE IF EXISTS codecTest;
|
||||
|
||||
CREATE TABLE codecTest (
|
||||
key UInt64,
|
||||
name String,
|
||||
ref_valueF64 Float64,
|
||||
ref_valueF32 Float32,
|
||||
valueF64 Float64 CODEC(FPC),
|
||||
valueF32 Float32 CODEC(FPC)
|
||||
) Engine = MergeTree ORDER BY key;
|
||||
|
||||
-- best case - same value
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
|
||||
|
||||
-- good case - values that grow insignificantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
|
||||
|
||||
-- bad case - values differ significantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
|
||||
|
||||
-- worst case - almost like a random values
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
|
||||
|
||||
|
||||
-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
|
||||
|
||||
-- referencing previous row key, value, and case name to simplify debugging.
|
||||
SELECT 'F64';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF64
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF64 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'F32';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF32
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF32 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
||||
|
||||
CREATE TABLE codecTest (
|
||||
key UInt64,
|
||||
name String,
|
||||
ref_valueF64 Float64,
|
||||
ref_valueF32 Float32,
|
||||
valueF64 Float64 CODEC(FPC(4)),
|
||||
valueF32 Float32 CODEC(FPC(4))
|
||||
) Engine = MergeTree ORDER BY key;
|
||||
|
||||
-- best case - same value
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
|
||||
|
||||
-- good case - values that grow insignificantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
|
||||
|
||||
-- bad case - values differ significantly
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
|
||||
|
||||
-- worst case - almost like a random values
|
||||
INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
|
||||
SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
|
||||
|
||||
|
||||
-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
|
||||
|
||||
-- referencing previous row key, value, and case name to simplify debugging.
|
||||
SELECT 'F64';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF64
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF64 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
SELECT 'F32';
|
||||
SELECT
|
||||
c1.key, c1.name,
|
||||
c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
|
||||
'prev:',
|
||||
c2.key, c2.ref_valueF32
|
||||
FROM
|
||||
codecTest as c1, codecTest as c2
|
||||
WHERE
|
||||
dF32 != 0
|
||||
AND
|
||||
c2.key = c1.key - 1
|
||||
LIMIT 10;
|
||||
|
||||
DROP TABLE IF EXISTS codecTest;
|
Loading…
Reference in New Issue
Block a user