Edited sign

2024-12-17 11:52:27 +00:00 · 2023-08-12 11:31:37 +00:00 · 2023-08-12 11:31:37 +00:00 · 8037dd88c0
commit 8037dd88c0
parent ba8cb254a3
4 changed files with 16 additions and 123 deletions
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@ -17,22 +17,6 @@
 namespace DB
 {

-enum class GCDTypes : Int8
-{
-    UInt8_type = 1,
-    UInt16_type = 2,
-    UInt32_type = 4,
-    UInt64_type = 8,
-    UInt128_type = 16,
-    UInt256_type = 32,
-    Int8_type = -1,
-    Int16_type = -2,
-    Int32_type = -4,
-    Int64_type = -8,
-    Int128_type = -16,
-    Int256_type = -32,
-};
-
 template <typename T>
 T gcd_func(T a, T b)
 {
@ -52,7 +36,7 @@ T gcd_func(T a, T b)
 class CompressionCodecGCD : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecGCD(Int8 gcd_bytes_size_);
+    explicit CompressionCodecGCD(UInt8 gcd_bytes_size_);

    uint8_t getMethodByte() const override;

@ -67,7 +51,7 @@ protected:
    bool isGenericCompression() const override { return false; }

 private:
-    const Int8 gcd_bytes_size;
+    const UInt8 gcd_bytes_size;
 };


@ -84,10 +68,10 @@ UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) c
    return uncompressed_size + 2;
 }

-CompressionCodecGCD::CompressionCodecGCD(Int8 gcd_bytes_size_)
+CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_)
    : gcd_bytes_size(gcd_bytes_size_)
 {
-    setCodecDescription("GCD", {std::make_shared<ASTLiteral>(static_cast<Int64>(gcd_bytes_size))});
+    setCodecDescription("GCD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(gcd_bytes_size))});
 }

 uint8_t CompressionCodecGCD::getMethodByte() const
@ -166,7 +150,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,

 UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    UInt8 bytes_to_skip = source_size % abs(gcd_bytes_size);
+    UInt8 bytes_to_skip = source_size % gcd_bytes_size;
    dest[0] = gcd_bytes_size;
    dest[1] = bytes_to_skip; /// unused (backward compatibility)
    memcpy(&dest[2], source, bytes_to_skip);
@ -191,24 +175,6 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si
    case 32:
        compressDataForType<UInt256>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
        break;
-    case -1:
-        compressDataForType<Int8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
-    case -2:
-        compressDataForType<Int16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
-    case -4:
-        compressDataForType<Int32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
-    case -8:
-        compressDataForType<Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
-    case -16:
-        compressDataForType<Int128>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
-    case -32:
-        compressDataForType<Int256>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
-        break;
    }
    return 1 + 1 + source_size;
 }
@ -221,13 +187,12 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
    if (uncompressed_size == 0)
        return;

-    Int8 bytes_size = source[0];
+    UInt8 bytes_size = source[0];

-    if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32 ||
-          bytes_size == -1 || bytes_size == -2 || bytes_size == -4 || bytes_size == -8 || bytes_size == -16 || bytes_size == -32))
+    if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32))
        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");

-    UInt8 bytes_to_skip = uncompressed_size % abs(bytes_size);
+    UInt8 bytes_to_skip = uncompressed_size % bytes_size;
    UInt32 output_size = uncompressed_size - bytes_to_skip;

    if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
@ -255,98 +220,24 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
    case 32:
        decompressDataForType<UInt256>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
        break;
-    case -1:
-        decompressDataForType<Int8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
-    case -2:
-        decompressDataForType<Int16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
-    case -4:
-        decompressDataForType<Int32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
-    case -8:
-        decompressDataForType<Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
-    case -16:
-        decompressDataForType<Int128>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
-    case -32:
-        decompressDataForType<Int256>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
-        break;
    }
 }

 namespace
 {

-Int8 getGCDBytesSize(const IDataType * column_type)
+UInt8 getGCDBytesSize(const IDataType * column_type)
 {
    if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size",
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is not applicable for {} because the data type is not of fixed size",
            column_type->getName());

    size_t max_size = column_type->getSizeOfValueInMemory();
    if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8 || max_size == 16 || max_size == 32)
-    {
-        if (column_type->getName() == "UInt8")
-        {
-            return static_cast<Int8>(GCDTypes::UInt8_type);
-        }
-        else if (column_type->getName() == "UInt16")
-        {
-            return static_cast<Int8>(GCDTypes::UInt16_type);
-        }
-        else if (column_type->getName() == "UInt32")
-        {
-            return static_cast<Int8>(GCDTypes::UInt32_type);
-        }
-        else if (column_type->getName() == "UInt64")
-        {
-            return static_cast<Int8>(GCDTypes::UInt64_type);
-        }
-        else if (column_type->getName() == "UInt128")
-        {
-            return static_cast<Int8>(GCDTypes::UInt128_type);
-        }
-        else if (column_type->getName() == "UInt256")
-        {
-            return static_cast<Int8>(GCDTypes::UInt256_type);
-        }
-        else if (column_type->getName() == "Int8")
-        {
-            return static_cast<Int8>(GCDTypes::Int8_type);
-        }
-        else if (column_type->getName() == "Int16")
-        {
-            return static_cast<Int8>(GCDTypes::Int16_type);
-        }
-        else if (column_type->getName() == "Int32")
-        {
-            return static_cast<Int8>(GCDTypes::Int32_type);
-        }
-        else if (column_type->getName() == "Int64")
-        {
-            return static_cast<Int8>(GCDTypes::Int64_type);
-        }
-        else if (column_type->getName() == "Int128")
-        {
-            return static_cast<Int8>(GCDTypes::Int128_type);
-        }
-        else if (column_type->getName() == "Int256")
-        {
-            return static_cast<Int8>(GCDTypes::Int256_type);
-        }
-        else
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
-            column_type->getName());
-        }
-    }
+        return static_cast<UInt8>(max_size);
    else
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
            column_type->getName());
-    }
 }

 }
@ -357,7 +248,7 @@ void registerCodecGCD(CompressionCodecFactory & factory)
    auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
    {
        /// Default bytes size is 1.
-        Int8 gcd_bytes_size = 1;
+        UInt8 gcd_bytes_size = 1;
        if (column_type)
        {
            gcd_bytes_size = getGCDBytesSize(column_type);
--- a/src/Compression/fuzzers/gcd_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/gcd_decompress_fuzzer.cpp
@ -11,7 +11,7 @@ namespace DB

 struct AuxiliaryRandomData
 {
-    Int8 gcd_size_bytes;
+    UInt8 gcd_size_bytes;
    size_t decompressed_size;
 };

--- a/tests/performance/codecs_int_insert.xml
+++ b/tests/performance/codecs_int_insert.xml
@ -13,6 +13,7 @@
                <value>Delta</value>
                <value>T64</value>
                <value>DoubleDelta</value>
+                <value>GCD</value>
            </values>
        </substitution>
        <substitution>
--- a/tests/performance/codecs_int_select.xml
+++ b/tests/performance/codecs_int_select.xml
@ -13,6 +13,7 @@
                <value>Delta</value>
                <value>T64</value>
                <value>DoubleDelta</value>
+                <value>GCD</value>
            </values>
        </substitution>
        <substitution>