Edited sign

This commit is contained in:
Александр Нам 2023-08-12 11:31:37 +00:00
parent ba8cb254a3
commit 8037dd88c0
4 changed files with 16 additions and 123 deletions

View File

@ -17,22 +17,6 @@
namespace DB
{
enum class GCDTypes : Int8
{
UInt8_type = 1,
UInt16_type = 2,
UInt32_type = 4,
UInt64_type = 8,
UInt128_type = 16,
UInt256_type = 32,
Int8_type = -1,
Int16_type = -2,
Int32_type = -4,
Int64_type = -8,
Int128_type = -16,
Int256_type = -32,
};
template <typename T>
T gcd_func(T a, T b)
{
@ -52,7 +36,7 @@ T gcd_func(T a, T b)
class CompressionCodecGCD : public ICompressionCodec
{
public:
explicit CompressionCodecGCD(Int8 gcd_bytes_size_);
explicit CompressionCodecGCD(UInt8 gcd_bytes_size_);
uint8_t getMethodByte() const override;
@ -67,7 +51,7 @@ protected:
bool isGenericCompression() const override { return false; }
private:
const Int8 gcd_bytes_size;
const UInt8 gcd_bytes_size;
};
@ -84,10 +68,10 @@ UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) c
return uncompressed_size + 2;
}
CompressionCodecGCD::CompressionCodecGCD(Int8 gcd_bytes_size_)
CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_)
: gcd_bytes_size(gcd_bytes_size_)
{
setCodecDescription("GCD", {std::make_shared<ASTLiteral>(static_cast<Int64>(gcd_bytes_size))});
setCodecDescription("GCD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(gcd_bytes_size))});
}
uint8_t CompressionCodecGCD::getMethodByte() const
@ -166,7 +150,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
UInt8 bytes_to_skip = source_size % abs(gcd_bytes_size);
UInt8 bytes_to_skip = source_size % gcd_bytes_size;
dest[0] = gcd_bytes_size;
dest[1] = bytes_to_skip; /// unused (backward compatibility)
memcpy(&dest[2], source, bytes_to_skip);
@ -191,24 +175,6 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si
case 32:
compressDataForType<UInt256>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -1:
compressDataForType<Int8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -2:
compressDataForType<Int16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -4:
compressDataForType<Int32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -8:
compressDataForType<Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -16:
compressDataForType<Int128>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
case -32:
compressDataForType<Int256>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
break;
}
return 1 + 1 + source_size;
}
@ -221,13 +187,12 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
if (uncompressed_size == 0)
return;
Int8 bytes_size = source[0];
UInt8 bytes_size = source[0];
if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32 ||
bytes_size == -1 || bytes_size == -2 || bytes_size == -4 || bytes_size == -8 || bytes_size == -16 || bytes_size == -32))
if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32))
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
UInt8 bytes_to_skip = uncompressed_size % abs(bytes_size);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
UInt32 output_size = uncompressed_size - bytes_to_skip;
if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
@ -255,98 +220,24 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
case 32:
decompressDataForType<UInt256>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -1:
decompressDataForType<Int8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -2:
decompressDataForType<Int16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -4:
decompressDataForType<Int32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -8:
decompressDataForType<Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -16:
decompressDataForType<Int128>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case -32:
decompressDataForType<Int256>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
}
}
namespace
{
Int8 getGCDBytesSize(const IDataType * column_type)
UInt8 getGCDBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size",
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is not applicable for {} because the data type is not of fixed size",
column_type->getName());
size_t max_size = column_type->getSizeOfValueInMemory();
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8 || max_size == 16 || max_size == 32)
{
if (column_type->getName() == "UInt8")
{
return static_cast<Int8>(GCDTypes::UInt8_type);
}
else if (column_type->getName() == "UInt16")
{
return static_cast<Int8>(GCDTypes::UInt16_type);
}
else if (column_type->getName() == "UInt32")
{
return static_cast<Int8>(GCDTypes::UInt32_type);
}
else if (column_type->getName() == "UInt64")
{
return static_cast<Int8>(GCDTypes::UInt64_type);
}
else if (column_type->getName() == "UInt128")
{
return static_cast<Int8>(GCDTypes::UInt128_type);
}
else if (column_type->getName() == "UInt256")
{
return static_cast<Int8>(GCDTypes::UInt256_type);
}
else if (column_type->getName() == "Int8")
{
return static_cast<Int8>(GCDTypes::Int8_type);
}
else if (column_type->getName() == "Int16")
{
return static_cast<Int8>(GCDTypes::Int16_type);
}
else if (column_type->getName() == "Int32")
{
return static_cast<Int8>(GCDTypes::Int32_type);
}
else if (column_type->getName() == "Int64")
{
return static_cast<Int8>(GCDTypes::Int64_type);
}
else if (column_type->getName() == "Int128")
{
return static_cast<Int8>(GCDTypes::Int128_type);
}
else if (column_type->getName() == "Int256")
{
return static_cast<Int8>(GCDTypes::Int256_type);
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
column_type->getName());
}
}
return static_cast<UInt8>(max_size);
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}",
column_type->getName());
}
}
}
@ -357,7 +248,7 @@ void registerCodecGCD(CompressionCodecFactory & factory)
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
/// Default bytes size is 1.
Int8 gcd_bytes_size = 1;
UInt8 gcd_bytes_size = 1;
if (column_type)
{
gcd_bytes_size = getGCDBytesSize(column_type);

View File

@ -11,7 +11,7 @@ namespace DB
struct AuxiliaryRandomData
{
Int8 gcd_size_bytes;
UInt8 gcd_size_bytes;
size_t decompressed_size;
};

View File

@ -13,6 +13,7 @@
<value>Delta</value>
<value>T64</value>
<value>DoubleDelta</value>
<value>GCD</value>
</values>
</substitution>
<substitution>

View File

@ -13,6 +13,7 @@
<value>Delta</value>
<value>T64</value>
<value>DoubleDelta</value>
<value>GCD</value>
</values>
</substitution>
<substitution>