This commit is contained in:
Nikita Mikhaylov 2021-08-03 12:24:16 +00:00
parent 6951e8147d
commit 2aea1c8d4a
6 changed files with 50 additions and 16 deletions

View File

@ -132,6 +132,10 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -502,6 +502,10 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -410,6 +410,10 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -62,6 +62,7 @@ private:
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int ILLEGAL_CODEC_PARAMETER;
}
@ -93,7 +94,10 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si
void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
if (!success)
throw Exception("Cannot decomress", ErrorCodes::CANNOT_DECOMPRESS);
}
void registerCodecLZ4(CompressionCodecFactory & factory)

View File

@ -412,13 +412,16 @@ template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match,
/// See also https://stackoverflow.com/a/30669632
template <size_t copy_amount, bool use_shuffle>
void NO_INLINE decompressImpl(
bool NO_INLINE decompressImpl(
const char * const source,
char * const dest,
size_t source_size,
size_t dest_size)
{
const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
UInt8 * op = reinterpret_cast<UInt8 *>(dest);
const UInt8 * const input_end = ip + source_size;
UInt8 * const output_begin = op;
UInt8 * const output_end = op + dest_size;
/// Unrolling with clang is doing >10% performance degrade.
@ -461,13 +464,19 @@ void NO_INLINE decompressImpl(
/// output: xyzHello, w
/// ^-op (we will overwrite excessive bytes on next iteration)
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (target == output_end)
return true;
}
ip += length;
op = copy_end;
if (copy_end >= output_end)
return;
if (unlikely(ip > input_end))
return false;
/// Get match offset.
@ -475,6 +484,9 @@ void NO_INLINE decompressImpl(
ip += 2;
const UInt8 * match = op - offset;
if (unlikely(match < output_begin))
return false;
/// Get match length.
length = token & 0x0F;
@ -515,7 +527,10 @@ void NO_INLINE decompressImpl(
copy<copy_amount>(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer.
if (length > copy_amount * 2)
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, copy_end);
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, target);
}
op = copy_end;
}
@ -524,7 +539,7 @@ void NO_INLINE decompressImpl(
}
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,
@ -532,7 +547,7 @@ void decompress(
PerformanceStatistics & statistics [[maybe_unused]])
{
if (source_size == 0 || dest_size == 0)
return;
return true;
/// Don't run timer if the block is too small.
if (dest_size >= 32768)
@ -542,24 +557,27 @@ void decompress(
/// Run the selected method and measure time.
Stopwatch watch;
bool success = true;
if (best_variant == 0)
decompressImpl<16, true>(source, dest, dest_size);
success = decompressImpl<16, true>(source, dest, source_size, dest_size);
if (best_variant == 1)
decompressImpl<16, false>(source, dest, dest_size);
success = decompressImpl<16, false>(source, dest, source_size, dest_size);
if (best_variant == 2)
decompressImpl<8, true>(source, dest, dest_size);
success = decompressImpl<8, true>(source, dest, source_size, dest_size);
if (best_variant == 3)
decompressImpl<32, false>(source, dest, dest_size);
success = decompressImpl<32, false>(source, dest, source_size, dest_size);
watch.stop();
/// Update performance statistics.
statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size);
return success;
}
else
{
decompressImpl<8, false>(source, dest, dest_size);
return decompressImpl<8, false>(source, dest, source_size, dest_size);
}
}

View File

@ -122,14 +122,14 @@ struct PerformanceStatistics
return choose_method;
}
PerformanceStatistics() {}
PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
PerformanceStatistics() = default;
explicit PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
};
/** This method dispatch to one of different implementations depending on performance statistics.
*/
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,