Merge 55d66a0c50 into e0f8b8d351

2024-11-24 00:22:29 +00:00 · 2024-11-21 05:48:37 +00:00 · 2024-11-21 05:48:37 +00:00 · 5d378419ee
commit 5d378419ee
parent e0f8b8d351 55d66a0c50
11 changed files with 620 additions and 24 deletions
--- a/src/Compression/CompressionCodecChimp.cpp
+++ b/src/Compression/CompressionCodecChimp.cpp
@ -0,0 +1,502 @@
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+
+#include <Compression/ICompressionCodec.h>
+#include <Compression/CompressionInfo.h>
+#include <Compression/CompressionFactory.h>
+#include <base/unaligned.h>
+#include <Parsers/IAST_fwd.h>
+#include <Parsers/ASTLiteral.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/BitHelpers.h>
+
+#include <bitset>
+#include <cstring>
+#include <algorithm>
+#include <type_traits>
+
+
+namespace DB
+{
+
+/** Chimp column codec implementation.
+ *
+ * Implementation of Chimp128 algorithm proposed in: Panagiotis Liakos, Katia Papakonstantinopoulou, Yannis Kotidis:
+ * Chimp: Efficient Lossless Floating Point Compression for Time Series Databases. Proc. VLDB Endow. 15(11): 3058-3070 (2022)
+ * Available in: https://dl.acm.org/doi/abs/10.14778/3551793.3551852
+ *
+ */
+class CompressionCodecChimp : public ICompressionCodec
+{
+public:
+    explicit CompressionCodecChimp(UInt8 data_bytes_size_);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return false; }
+    bool isFloatingPointTimeSeriesCodec() const override { return true; }
+
+private:
+    const UInt8 data_bytes_size;
+};
+
+namespace LeadingZero
+{
+    static const auto BIT_LENGTH = 3;
+    static const short round[65] =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0,
+        8, 8, 8, 8, 12, 12, 12, 12,
+        16, 16, 18, 18, 20, 20, 22, 22,
+        24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24
+    };
+    static const short binaryRepresentation[65] =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 2, 2, 2, 2,
+        3, 3, 4, 4, 5, 5, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7
+    };
+    static const short reverseBinaryRepresentation[8] = {0, 8, 12, 16, 18, 20, 22, 24};
+}
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+    extern const int ILLEGAL_CODEC_PARAMETER;
+}
+
+namespace
+{
+
+constexpr UInt8 getBitLengthOfLength(UInt8 data_bytes_size)
+{
+    // 4-byte value is 32 bits, and we need 5 bits to represent 32 values
+    // 8-byte          64 bits        =>    6
+    const UInt8 bit_lengths[] = {0, 0, 0, 0, 5, 0, 0, 0, 6};
+    assert(data_bytes_size >= 1 && data_bytes_size < sizeof(bit_lengths) && bit_lengths[data_bytes_size] != 0);
+    return bit_lengths[data_bytes_size];
+}
+
+UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
+{
+    constexpr UInt8 items_count_size = 4;
+    return items_count_size + data_bytes_size;
+}
+
+UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
+{
+    const UInt32 items_count = uncompressed_size / data_bytes_size;
+    static const auto DATA_BIT_LENGTH = getBitLengthOfLength(data_bytes_size);
+    static const short LOG_NO_PREVIOUS_VALUES = static_cast<short>(std::log2(data_bytes_size * 16));
+    // worst case (for 32-bit value):
+    // 2 bits (flag) + 6 bits (previous values index) + 3 bits (no of leading zeroes) + 5 bits(data bit-size) + non-zero data bits.
+    const UInt32 max_item_size_bits = 2 + LOG_NO_PREVIOUS_VALUES + LeadingZero::BIT_LENGTH + DATA_BIT_LENGTH + data_bytes_size * 8;
+    // + 8 is to round up to next byte.
+    return (items_count * max_item_size_bits + 8) / 8;
+}
+
+struct BinaryValueInfo
+{
+    UInt8 leading_zero_bits;
+    UInt8 data_bits;
+    UInt8 trailing_zero_bits;
+};
+
+template <typename T>
+BinaryValueInfo getBinaryValueInfo(const T & value)
+{
+    constexpr UInt8 bit_size = sizeof(T) * 8;
+    const UInt8 lz = LeadingZero::round[getLeadingZeroBits(value)];
+    const UInt8 tz = getTrailingZeroBits(value);
+    const UInt8 data_size = value == 0 ? 0 : static_cast<UInt8>(bit_size - lz - tz);
+    return {lz, data_size, tz};
+}
+
+template <typename T>
+UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    if (source_size % sizeof(T) != 0)
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with Chimp codec, data size {} is not aligned to {}", source_size, sizeof(T));
+
+    const char * const source_end = source + source_size;
+    const char * const dest_start = dest;
+    const char * const dest_end = dest + dest_size;
+
+    const UInt32 items_count = source_size / sizeof(T);
+
+    static const short NO_PREVIOUS_VALUES = sizeof(T) * 16;
+    T stored_values[NO_PREVIOUS_VALUES];
+    for (int i = 0; i < NO_PREVIOUS_VALUES; i++)
+    {
+        stored_values[i] = 0;
+    }
+    static const short LOG_NO_PREVIOUS_VALUES = static_cast<short>(std::log2(NO_PREVIOUS_VALUES));
+    static const short THRESHOLD = 6 + LOG_NO_PREVIOUS_VALUES;
+    static const int ARRAY_SIZE = static_cast<int>(std::pow(2, THRESHOLD + 1));
+    int indices[ARRAY_SIZE];
+    for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+        indices[i] = 0;
+    }
+    static const short setLsb = ARRAY_SIZE - 1;
+
+    unalignedStoreLittleEndian<UInt32>(dest, items_count);
+    dest += sizeof(items_count);
+
+    T prev_value = 0;
+    // That would cause first XORed value to be written in-full.
+    BinaryValueInfo prev_xored_info{0, 0, 0};
+
+    if (source < source_end)
+    {
+        prev_value = unalignedLoadLittleEndian<T>(source);
+        unalignedStoreLittleEndian<T>(dest, prev_value);
+
+        source += sizeof(prev_value);
+        dest += sizeof(prev_value);
+        stored_values[0] = prev_value;
+    }
+
+    BitWriter writer(dest, dest_end - dest);
+
+    static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
+
+    int total = 0;
+    int previous_index = 0;
+    int current_index = 0;
+
+    while (source < source_end)
+    {
+        const T curr_value = unalignedLoadLittleEndian<T>(source);
+        source += sizeof(curr_value);
+
+        // find best matching previous value
+        T xored_data;
+        BinaryValueInfo curr_xored_info;
+        int match_key = static_cast<int>(curr_value & setLsb);
+        int match_index = indices[match_key];
+        if ((total - match_index) < NO_PREVIOUS_VALUES)
+        {
+            T tempXor = curr_value ^ stored_values[match_index % NO_PREVIOUS_VALUES];
+            curr_xored_info = getBinaryValueInfo(tempXor);
+            // if match is good enough, use it
+            if (curr_xored_info.trailing_zero_bits > THRESHOLD)
+            {
+                previous_index = match_index % NO_PREVIOUS_VALUES;
+                xored_data = tempXor;
+            }
+            // otherwise use immediately previous value
+            else
+            {
+                previous_index =  total % NO_PREVIOUS_VALUES;
+                xored_data = curr_value ^ stored_values[previous_index];
+                curr_xored_info = getBinaryValueInfo(xored_data);
+            }
+        }
+        // if match is outside of range, use immediately previous value
+        else
+        {
+            previous_index =  total % NO_PREVIOUS_VALUES;
+            xored_data = curr_value ^ stored_values[previous_index];
+            curr_xored_info = getBinaryValueInfo(xored_data);
+        }
+
+        // encode
+        // 0b00 prefix
+        if (xored_data == 0)
+        {
+            writer.writeBits(2, 0b00);
+            writer.writeBits(LOG_NO_PREVIOUS_VALUES, previous_index);
+            curr_xored_info.leading_zero_bits = 255; // max value so it can't be used
+        }
+        // 0b01 prefix
+        else if (curr_xored_info.trailing_zero_bits > THRESHOLD)
+        {
+            writer.writeBits(2, 0b01);
+            writer.writeBits(LOG_NO_PREVIOUS_VALUES, previous_index);
+            writer.writeBits(LeadingZero::BIT_LENGTH, LeadingZero::binaryRepresentation[curr_xored_info.leading_zero_bits]);
+            writer.writeBits(DATA_BIT_LENGTH, curr_xored_info.data_bits);
+            writer.writeBits(curr_xored_info.data_bits, xored_data >> curr_xored_info.trailing_zero_bits);
+            curr_xored_info.leading_zero_bits = 255; // max value so it can't be used
+        }
+        // 0b10 prefix
+        else if (prev_xored_info.leading_zero_bits == curr_xored_info.leading_zero_bits)
+        {
+            writer.writeBits(2, 0b10);
+            writer.writeBits(curr_xored_info.data_bits + curr_xored_info.trailing_zero_bits, xored_data);
+        }
+        // 0b11 prefix
+        else
+        {
+            writer.writeBits(2, 0b11);
+            writer.writeBits(LeadingZero::BIT_LENGTH, LeadingZero::binaryRepresentation[curr_xored_info.leading_zero_bits]);
+            writer.writeBits(curr_xored_info.data_bits + curr_xored_info.trailing_zero_bits, xored_data);
+        }
+
+        // update stored previous values and indices
+        prev_xored_info = curr_xored_info;
+        prev_value = curr_value;
+        current_index = (current_index + 1) % NO_PREVIOUS_VALUES;
+        stored_values[current_index] = curr_value;
+        total++;
+        indices[match_key] = total;
+    }
+    writer.flush();
+
+    return static_cast<UInt32>((dest - dest_start) + (writer.count() + 7) / 8);
+}
+
+template <typename T>
+void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    static const short NO_PREVIOUS_VALUES = sizeof(T) * 16;
+    static const short LOG_NO_PREVIOUS_VALUES = static_cast<short>(std::log2(NO_PREVIOUS_VALUES));
+    int current_index = 0;
+    T stored_values[NO_PREVIOUS_VALUES];
+    for (int i = 0; i < NO_PREVIOUS_VALUES; i++)
+    {
+        stored_values[i] = 0;
+    }
+
+    const char * const source_end = source + source_size;
+
+    if (source + sizeof(UInt32) > source_end)
+        return;
+
+
+    const UInt32 items_count = unalignedLoadLittleEndian<UInt32>(source);
+    source += sizeof(items_count);
+
+    T prev_value = 0;
+
+    // decoding first item
+    if (source + sizeof(T) > source_end || items_count < 1)
+        return;
+
+    if (static_cast<UInt64>(items_count) * sizeof(T) > dest_size)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data: corrupted input data.");
+
+    prev_value = unalignedLoadLittleEndian<T>(source);
+    unalignedStoreLittleEndian<T>(dest, prev_value);
+
+    source += sizeof(prev_value);
+    dest += sizeof(prev_value);
+    stored_values[0] = prev_value;
+
+    BitReader reader(source, source_size - sizeof(items_count) - sizeof(prev_value));
+
+    BinaryValueInfo prev_xored_info{0, 0, 0};
+
+    static const auto DATA_BIT_LENGTH = getBitLengthOfLength(sizeof(T));
+
+    // since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
+    // we have to keep track of items to avoid reading more than there is.
+    for (UInt32 items_read = 1; items_read < items_count && !reader.eof(); ++items_read)
+    {
+        T curr_value = prev_value;
+        BinaryValueInfo curr_xored_info = prev_xored_info;
+        T xored_data = 0;
+        UInt64 match_index;
+        UInt8 flag = reader.readBits(2);
+        switch (flag)
+        {
+            // 0b11 prefix
+            case 3:
+                curr_xored_info.leading_zero_bits = LeadingZero::reverseBinaryRepresentation[reader.readBits(LeadingZero::BIT_LENGTH)];
+                curr_xored_info.data_bits = sizeof(T) * 8 - curr_xored_info.leading_zero_bits;
+                xored_data = static_cast<T>(reader.readBits(curr_xored_info.data_bits));
+                curr_value = prev_value ^ xored_data;
+                break;
+            // 0b10 prefix
+            case 2:
+                curr_xored_info.leading_zero_bits = prev_xored_info.leading_zero_bits;
+                curr_xored_info.data_bits = sizeof(T) * 8 - curr_xored_info.leading_zero_bits;
+                xored_data = static_cast<T>(reader.readBits(curr_xored_info.data_bits));
+                curr_value = prev_value ^ xored_data;
+                break;
+            // 0b01 prefix
+            case 1:
+                match_index = reader.readBits(LOG_NO_PREVIOUS_VALUES);
+                prev_value = stored_values[match_index];
+                curr_xored_info.leading_zero_bits = LeadingZero::reverseBinaryRepresentation[reader.readBits(LeadingZero::BIT_LENGTH)];
+                curr_xored_info.data_bits = reader.readBits(DATA_BIT_LENGTH);
+                if (curr_xored_info.data_bits == 0)
+                {
+                    curr_xored_info.data_bits = sizeof(T) * 8;
+                }
+                curr_xored_info.trailing_zero_bits = sizeof(T) * 8 - curr_xored_info.leading_zero_bits - curr_xored_info.data_bits;
+                xored_data = static_cast<T>(reader.readBits(curr_xored_info.data_bits));
+                xored_data <<= curr_xored_info.trailing_zero_bits;
+                curr_value = prev_value ^ xored_data;
+                break;
+            // 0b00 prefix
+            case 0:
+                match_index = reader.readBits(LOG_NO_PREVIOUS_VALUES);
+                prev_value = stored_values[match_index];
+                curr_value = prev_value;
+                break;
+        }
+        unalignedStoreLittleEndian<T>(dest, curr_value);
+        dest += sizeof(curr_value);
+
+        current_index = (current_index + 1) % NO_PREVIOUS_VALUES;
+        stored_values[current_index] = curr_value;
+        prev_xored_info = curr_xored_info;
+        prev_value = curr_value;
+    }
+}
+
+UInt8 getDataBytesSize(const IDataType * column_type)
+{
+    if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Chimp is not applicable for {} because the data type is not of fixed size",
+            column_type->getName());
+
+    size_t max_size = column_type->getSizeOfValueInMemory();
+    if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
+        return static_cast<UInt8>(max_size);
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Chimp is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}",
+            column_type->getName());
+}
+
+}
+
+
+CompressionCodecChimp::CompressionCodecChimp(UInt8 data_bytes_size_)
+    : data_bytes_size(data_bytes_size_)
+{
+    setCodecDescription("Chimp");
+}
+
+uint8_t CompressionCodecChimp::getMethodByte() const
+{
+    return static_cast<uint8_t>(CompressionMethodByte::Chimp);
+}
+
+void CompressionCodecChimp::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
+    hash.update(data_bytes_size);
+}
+
+UInt32 CompressionCodecChimp::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+{
+    const auto result = 2 // common header
+            + data_bytes_size // max bytes skipped if source is not properly aligned.
+            + getCompressedHeaderSize(data_bytes_size) // data-specific header
+            + getCompressedDataSize(data_bytes_size, uncompressed_size);
+    return result;
+}
+
+UInt32 CompressionCodecChimp::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+    UInt8 bytes_to_skip = source_size % data_bytes_size;
+    dest[0] = data_bytes_size;
+    dest[1] = bytes_to_skip; /// unused (backward compatibility)
+    memcpy(&dest[2], source, bytes_to_skip);
+    size_t start_pos = 2 + bytes_to_skip;
+    UInt32 result_size = 0;
+
+    const UInt32 compressed_size = getMaxCompressedDataSize(source_size);
+    switch (data_bytes_size) // NOLINT(bugprone-switch-missing-default-case)
+    {
+    case 4:
+        result_size = compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos], compressed_size);
+        break;
+    case 8:
+        result_size = compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos], compressed_size);
+        break;
+    }
+    return 2 + bytes_to_skip + result_size;
+}
+
+void CompressionCodecChimp::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
+{
+    if (source_size < 2)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data. File has wrong header");
+
+    UInt8 bytes_size = source[0];
+
+    if (bytes_size == 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data. File has wrong header");
+
+    UInt8 bytes_to_skip = uncompressed_size % bytes_size;
+
+    if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data. File has wrong header");
+
+    if (bytes_to_skip >= uncompressed_size)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data. File has wrong header");
+
+    memcpy(dest, &source[2], bytes_to_skip);
+    UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
+    UInt32 uncompressed_size_left = uncompressed_size - bytes_to_skip;
+    switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case)
+    {
+    case 4:
+        decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
+        break;
+    case 8:
+        decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
+        break;
+    default:
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Chimp-encoded data. File has wrong header");
+    }
+}
+
+void registerCodecChimp(CompressionCodecFactory & factory)
+{
+    UInt8 method_code = static_cast<UInt8>(CompressionMethodByte::Chimp);
+    auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
+    {
+        /// Default bytes size is 1
+        UInt8 data_bytes_size = 1;
+        if (arguments && !arguments->children.empty())
+        {
+            if (arguments->children.size() > 1)
+                throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "Chimp codec must have 1 parameter, given {}", arguments->children.size());
+
+            const auto children = arguments->children;
+            const auto * literal = children[0]->as<ASTLiteral>();
+            if (!literal || literal->value.getType() != Field::Types::Which::UInt64)
+                throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Chimp codec argument must be unsigned integer");
+
+            size_t user_bytes_size = literal->value.safeGet<UInt64>();
+            if (user_bytes_size != 4 && user_bytes_size != 8)
+                throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Argument value for Chimp codec can be 4 or 8, given {}", user_bytes_size);
+            data_bytes_size = static_cast<UInt8>(user_bytes_size);
+        }
+        else if (column_type)
+        {
+            data_bytes_size = getDataBytesSize(column_type);
+        }
+
+        return std::make_shared<CompressionCodecChimp>(data_bytes_size);
+    };
+    factory.registerCompressionCodecWithType("Chimp", method_code, codec_builder);
+}
+}
--- a/src/Compression/CompressionFactory.cpp
+++ b/src/Compression/CompressionFactory.cpp
@ -186,6 +186,7 @@ void registerCodecGorilla(CompressionCodecFactory & factory);
 void registerCodecEncrypted(CompressionCodecFactory & factory);
 void registerCodecFPC(CompressionCodecFactory & factory);
 void registerCodecGCD(CompressionCodecFactory & factory);
+void registerCodecChimp(CompressionCodecFactory & factory);

 CompressionCodecFactory::CompressionCodecFactory()
 {
@ -203,6 +204,7 @@ CompressionCodecFactory::CompressionCodecFactory()
    registerCodecGorilla(*this);
    registerCodecEncrypted(*this);
    registerCodecFPC(*this);
+    registerCodecChimp(*this);
    registerCodecGCD(*this);

    default_codec = get("LZ4", {});
--- a/src/Compression/CompressionInfo.h
+++ b/src/Compression/CompressionInfo.h
@ -48,6 +48,7 @@ enum class CompressionMethodByte : uint8_t
    FPC             = 0x98,
    GCD             = 0x9a,
    ZSTD_QPL        = 0x9b,
+    Chimp           = 0x9c,
 };

 }
--- a/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/src/Compression/tests/gtest_compressionCodec.cpp
@ -520,12 +520,13 @@ public:

 TEST_P(CodecTest, TranscodingWithDataType)
 {
-    /// Gorilla can only be applied to floating point columns
+    /// Gorilla and Chimp can only be applied to floating point columns
    bool codec_is_gorilla = std::get<0>(GetParam()).codec_statement.find("Gorilla") != std::string::npos;
+    bool codec_is_chimp = std::get<0>(GetParam()).codec_statement.find("Chimp") != std::string::npos;
    WhichDataType which(std::get<1>(GetParam()).data_type.get());
    bool data_is_float = which.isFloat();
-    if (codec_is_gorilla && !data_is_float)
-        GTEST_SKIP() << "Skipping Gorilla-compressed non-float column";
+    if ((codec_is_gorilla || codec_is_chimp) && !data_is_float)
+        GTEST_SKIP() << "Skipping Gorilla/Chimp-compressed non-float column";

    const auto codec = makeCodec(CODEC_WITH_DATA_TYPE);
    testTranscoding(*codec);
@ -808,7 +809,10 @@ const auto DefaultCodecsToTest = ::testing::Values(
    Codec("DoubleDelta, ZSTD"),
    Codec("Gorilla"),
    Codec("Gorilla, LZ4"),
-    Codec("Gorilla, ZSTD")
+    Codec("Gorilla, ZSTD"),
+    Codec("Chimp"),
+    Codec("Chimp, LZ4"),
+    Codec("Chimp, ZSTD")
 );

 ///////////////////////////////////////////////////////////////////////////////////////////////////
--- a/tests/performance/codecs_float_insert.xml
+++ b/tests/performance/codecs_float_insert.xml
@ -13,6 +13,7 @@
                <value>DoubleDelta</value>
                <value>Gorilla</value>
                <value>FPC</value>
+                <value>Chimp</value>
            </values>
        </substitution>
        <substitution>
--- a/tests/performance/codecs_float_select.xml
+++ b/tests/performance/codecs_float_select.xml
@ -13,6 +13,7 @@
                <value>DoubleDelta</value>
                <value>Gorilla</value>
                <value>FPC</value>
+                <value>Chimp</value>
            </values>
        </substitution>
        <substitution>
--- a/tests/queries/0_stateless/00950_test_chimp_codec.reference
+++ b/tests/queries/0_stateless/00950_test_chimp_codec.reference
@ -0,0 +1,2 @@
+F64
+F32
--- a/tests/queries/0_stateless/00950_test_chimp_codec.sql
+++ b/tests/queries/0_stateless/00950_test_chimp_codec.sql
@ -0,0 +1,63 @@
+DROP TABLE IF EXISTS codecTest;
+
+SET cross_to_inner_join_rewrite = 1;
+
+CREATE TABLE codecTest (
+    key      UInt64,
+    name     String,
+    ref_valueF64 Float64,
+    ref_valueF32 Float32,
+    valueF64 Float64  CODEC(Chimp),
+    valueF32 Float32  CODEC(Chimp)
+) Engine = MergeTree ORDER BY key;
+
+-- best case - same value
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
+
+-- good case - values that grow insignificantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
+
+-- bad case - values differ significantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
+
+-- worst case - almost like a random values
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
+
+
+-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
+
+-- referencing previous row key, value, and case name to simplify debugging.
+SELECT 'F64';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
+	'prev:',
+	c2.key, c2.ref_valueF64
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF64 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+
+SELECT 'F32';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
+	'prev:',
+	c2.key, c2.ref_valueF32
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF32 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+DROP TABLE IF EXISTS codecTest;
--- a/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.sql
+++ b/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.sql
@ -8,24 +8,33 @@ CREATE TABLE tbl (
    -- Nullable
    v1_gor Nullable(Float64) CODEC(Gorilla),
    v1_fpc Nullable(Float64) CODEC(FPC),
+    v1_chi Nullable(Float64) CODEC(Chimp),
    -- Array
    v2_gor Array(Float64) CODEC(Gorilla),
    v2_fpc Array(Float64) CODEC(FPC),
+    v2_chi Array(Float64) CODEC(Chimp),
    v3_gor Array(Array(Float64)) CODEC(Gorilla),
    v3_fpc Array(Array(Float64)) CODEC(FPC),
+    v3_chi Array(Array(Float64)) CODEC(Chimp),
    v4_gor Array(Nullable(Float64)) CODEC(Gorilla),
    v4_fpc Array(Nullable(Float64)) CODEC(FPC),
+    v4_chi Array(Nullable(Float64)) CODEC(Chimp),
    v5_gor Array(Tuple(Float64)) CODEC(Gorilla),
    v5_fpc Array(Tuple(Float64)) CODEC(FPC),
+    v5_chi Array(Tuple(Float64)) CODEC(Chimp),
    -- Tuple
    v6_gor Tuple(Float64) CODEC(Gorilla),
    v6_fpc Tuple(Float64) CODEC(FPC),
+    v6_chi Tuple(Float64) CODEC(Chimp),
    v7_gor Tuple(Tuple(Float64)) CODEC(Gorilla),
    v7_fpc Tuple(Tuple(Float64)) CODEC(FPC),
+    v7_chi Tuple(Tuple(Float64)) CODEC(Chimp),
    v8_gor Tuple(Nullable(Float64)) CODEC(Gorilla),
    v8_fpc Tuple(Nullable(Float64)) CODEC(FPC),
+    v8_chi Tuple(Nullable(Float64)) CODEC(Chimp),
    v9_gor Tuple(Array(Float64)) CODEC(Gorilla),
    v9_fpc Tuple(Array(Float64)) CODEC(FPC),
+    v9_chi Tuple(Array(Float64)) CODEC(Chimp),
 ) Engine = MergeTree ORDER BY tuple();

 DROP TABLE IF EXISTS tbl;
--- a/tests/queries/0_stateless/02584_compressor_codecs.reference
+++ b/tests/queries/0_stateless/02584_compressor_codecs.reference
@ -7,3 +7,6 @@
 1
 1
 1
+1
+1
+1
--- a/tests/queries/0_stateless/02584_compressor_codecs.sh
+++ b/tests/queries/0_stateless/02584_compressor_codecs.sh
@ -4,31 +4,39 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

-echo "Hello, World!" > 02584_test_data
+DATA_FILE=$(mktemp -q 02584_test_data_XXXXXX)
+OUT_FILE=$(mktemp -q 02584_test_out_XXXXXX)

-$CLICKHOUSE_COMPRESSOR --codec 'Delta' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'
-$CLICKHOUSE_COMPRESSOR --codec 'Delta(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'Delta([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'Delta(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
+echo "Hello, World!" > $DATA_FILE;

-$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'
-$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
+$CLICKHOUSE_COMPRESSOR --codec 'Delta' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+$CLICKHOUSE_COMPRESSOR --codec 'Delta(5)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Delta([1,2])' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Delta(4)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;

-$CLICKHOUSE_COMPRESSOR --codec 'Gorilla' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'
-$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'Gorilla([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
+$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(5)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta([1,2])' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(4)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;

-$CLICKHOUSE_COMPRESSOR --codec 'FPC' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
-$CLICKHOUSE_COMPRESSOR --codec 'FPC(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
-$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 1)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'FPC([1,2,3])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
-$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out';
+$CLICKHOUSE_COMPRESSOR --codec 'Gorilla' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(5)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Gorilla([1,2])' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(4)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+
+$CLICKHOUSE_COMPRESSOR --codec 'Chimp(1)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Chimp(5)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Chimp([1,2])' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'Chimp(4)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+
+$CLICKHOUSE_COMPRESSOR --codec 'FPC' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+$CLICKHOUSE_COMPRESSOR --codec 'FPC(5)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;
+$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 1)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'FPC([1,2,3])' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER";
+$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 4)' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE;


-$CLICKHOUSE_COMPRESSOR --codec 'T64' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "CANNOT_COMPRESS";
+$CLICKHOUSE_COMPRESSOR --codec 'T64' --codec 'LZ4' --input $DATA_FILE --output $OUT_FILE 2>&1 | grep -c "CANNOT_COMPRESS";

-rm 02584_test_data 02584_test_out
+rm $DATA_FILE $OUT_FILE