From 2aea1c8d4a5be320365472052d8a48bf69fd9fe9 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 3 Aug 2021 12:24:16 +0000 Subject: [PATCH] done --- src/Compression/CompressionCodecDelta.cpp | 4 ++ .../CompressionCodecDoubleDelta.cpp | 4 ++ src/Compression/CompressionCodecGorilla.cpp | 4 ++ src/Compression/CompressionCodecLZ4.cpp | 6 ++- src/Compression/LZ4_decompress_faster.cpp | 42 +++++++++++++------ src/Compression/LZ4_decompress_faster.h | 6 +-- 6 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index 447abe9e840..e281609ff43 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -132,6 +132,10 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); UInt8 bytes_size = source[0]; + + if (bytes_size == 0) + throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); + UInt8 bytes_to_skip = uncompressed_size % bytes_size; if (UInt32(2 + bytes_to_skip) > source_size) diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 79ced55594a..c416582eb6b 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -502,6 +502,10 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); UInt8 bytes_size = source[0]; + + if (bytes_size == 0) + throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); + UInt8 bytes_to_skip = uncompressed_size % bytes_size; if (UInt32(2 + bytes_to_skip) > source_size) diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp index 7fcb2183503..1276ac911f1 100644 --- a/src/Compression/CompressionCodecGorilla.cpp +++ b/src/Compression/CompressionCodecGorilla.cpp @@ -410,6 +410,10 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); UInt8 bytes_size = source[0]; + + if (bytes_size == 0) + throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); + UInt8 bytes_to_skip = uncompressed_size % bytes_size; if (UInt32(2 + bytes_to_skip) > source_size) diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index 8cb81e460b1..379eea7e45c 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -62,6 +62,7 @@ private: namespace ErrorCodes { extern const int CANNOT_COMPRESS; + extern const int CANNOT_DECOMPRESS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; } @@ -93,7 +94,10 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { - LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat); + bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat); + + if (!success) + throw Exception("Cannot decomress", ErrorCodes::CANNOT_DECOMPRESS); } void registerCodecLZ4(CompressionCodecFactory & factory) diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index dc293941310..6972457f11b 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -412,13 +412,16 @@ template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match, /// See also https://stackoverflow.com/a/30669632 template -void NO_INLINE decompressImpl( +bool NO_INLINE decompressImpl( const char * const source, char * const dest, + size_t source_size, size_t dest_size) { const UInt8 * ip = reinterpret_cast(source); UInt8 * op = reinterpret_cast(dest); + const UInt8 * const input_end = ip + source_size; + UInt8 * const output_begin = op; UInt8 * const output_end = op + dest_size; /// Unrolling with clang is doing >10% performance degrade. @@ -461,13 +464,19 @@ void NO_INLINE decompressImpl( /// output: xyzHello, w /// ^-op (we will overwrite excessive bytes on next iteration) - wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. + { + auto * target = std::min(copy_end, output_end); + wildCopy(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer. + + if (target == output_end) + return true; + } ip += length; op = copy_end; - if (copy_end >= output_end) - return; + if (unlikely(ip > input_end)) + return false; /// Get match offset. @@ -475,6 +484,9 @@ void NO_INLINE decompressImpl( ip += 2; const UInt8 * match = op - offset; + if (unlikely(match < output_begin)) + return false; + /// Get match length. length = token & 0x0F; @@ -515,7 +527,10 @@ void NO_INLINE decompressImpl( copy(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer. if (length > copy_amount * 2) - wildCopy(op + copy_amount, match + copy_amount, copy_end); + { + auto * target = std::min(copy_end, output_end); + wildCopy(op + copy_amount, match + copy_amount, target); + } op = copy_end; } @@ -524,7 +539,7 @@ void NO_INLINE decompressImpl( } -void decompress( +bool decompress( const char * const source, char * const dest, size_t source_size, @@ -532,7 +547,7 @@ void decompress( PerformanceStatistics & statistics [[maybe_unused]]) { if (source_size == 0 || dest_size == 0) - return; + return true; /// Don't run timer if the block is too small. if (dest_size >= 32768) @@ -542,24 +557,27 @@ void decompress( /// Run the selected method and measure time. Stopwatch watch; + bool success = true; if (best_variant == 0) - decompressImpl<16, true>(source, dest, dest_size); + success = decompressImpl<16, true>(source, dest, source_size, dest_size); if (best_variant == 1) - decompressImpl<16, false>(source, dest, dest_size); + success = decompressImpl<16, false>(source, dest, source_size, dest_size); if (best_variant == 2) - decompressImpl<8, true>(source, dest, dest_size); + success = decompressImpl<8, true>(source, dest, source_size, dest_size); if (best_variant == 3) - decompressImpl<32, false>(source, dest, dest_size); + success = decompressImpl<32, false>(source, dest, source_size, dest_size); watch.stop(); /// Update performance statistics. statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size); + + return success; } else { - decompressImpl<8, false>(source, dest, dest_size); + return decompressImpl<8, false>(source, dest, source_size, dest_size); } } diff --git a/src/Compression/LZ4_decompress_faster.h b/src/Compression/LZ4_decompress_faster.h index 30a0d7acb22..c1b54cf20c9 100644 --- a/src/Compression/LZ4_decompress_faster.h +++ b/src/Compression/LZ4_decompress_faster.h @@ -122,14 +122,14 @@ struct PerformanceStatistics return choose_method; } - PerformanceStatistics() {} - PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} + PerformanceStatistics() = default; + explicit PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} }; /** This method dispatch to one of different implementations depending on performance statistics. */ -void decompress( +bool decompress( const char * const source, char * const dest, size_t source_size,