From e63da5969f6526fae57cb2b786ece304c1143a29 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 19 Aug 2021 12:52:24 +0000 Subject: [PATCH] First blood --- src/Common/examples/YAML_fuzzer.cpp | 39 +++++++++++++++ src/Compression/CMakeLists.txt | 11 ----- src/Compression/CompressionCodecDelta.cpp | 6 +++ .../CompressionCodecDoubleDelta.cpp | 1 + src/Compression/CompressionCodecLZ4.cpp | 6 +++ src/Compression/ICompressionCodec.h | 4 ++ src/Compression/fuzzers/CMakeLists.txt | 10 +++- .../fuzzers/delta_decompress_fuzzer.cpp | 47 ++++++++++++++++++ .../fuzzers/lz4_decompress_fuzzer.cpp | 48 +++++++++++++++++++ 9 files changed, 159 insertions(+), 13 deletions(-) create mode 100644 src/Common/examples/YAML_fuzzer.cpp create mode 100644 src/Compression/fuzzers/delta_decompress_fuzzer.cpp create mode 100644 src/Compression/fuzzers/lz4_decompress_fuzzer.cpp diff --git a/src/Common/examples/YAML_fuzzer.cpp b/src/Common/examples/YAML_fuzzer.cpp new file mode 100644 index 00000000000..63908245a66 --- /dev/null +++ b/src/Common/examples/YAML_fuzzer.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + /// How to test: + /// build ClickHouse with YAML_fuzzer.cpp + /// ./YAML_fuzzer YAML_CORPUS + /// where YAML_CORPUS is a directory with different YAML configs for libfuzzer + char file_name[L_tmpnam]; + if (!std::tmpnam(file_name)) + { + std::cerr << "Cannot create temp file!\n"; + return 1; + } + std::string input = std::string(reinterpret_cast(data), size); + + { + std::ofstream temp_file(file_name); + temp_file << input; + } + + try + { + DB::YAMLParserImpl::parse(std::string(file_name)); + } + catch (...) + { + std::cerr << "YAML_fuzzer failed: " << DB::getCurrentExceptionMessage(__PRETTY_FUNCTION__) << std::endl; + return 1; + } + return 0; +} diff --git a/src/Compression/CMakeLists.txt b/src/Compression/CMakeLists.txt index 34369d8dbc8..563a77bc168 100644 --- a/src/Compression/CMakeLists.txt +++ b/src/Compression/CMakeLists.txt @@ -1,14 +1,3 @@ -if (ENABLE_FUZZING) - include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") - add_headers_and_sources(fuzz_compression .) - - # Remove this file, because it has dependencies on DataTypes - list(REMOVE_ITEM ${fuzz_compression_sources} CompressionFactoryAdditions.cpp) - - add_library(fuzz_compression ${fuzz_compression_headers} ${fuzz_compression_sources}) - target_link_libraries(fuzz_compression PUBLIC clickhouse_parsers clickhouse_common_io common lz4) -endif() - if (ENABLE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index e281609ff43..d3f941efdd1 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -209,4 +209,10 @@ void registerCodecDelta(CompressionCodecFactory & factory) return std::make_shared(delta_bytes_size); }); } + +CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size) +{ + return std::make_shared(delta_bytes_size); +} + } diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index c416582eb6b..935efad5806 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -543,4 +543,5 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory) return std::make_shared(data_bytes_size); }); } + } diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index 396f6fad2c3..12f138dc95a 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -147,4 +147,10 @@ CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_) setCodecDescription("LZ4HC", {std::make_shared(static_cast(level))}); } + +CompressionCodecPtr getCompressionCodecLZ4(int level) +{ + return std::make_shared(level); +} + } diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index c49c16d8bad..dfeff3e77c8 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -18,6 +18,8 @@ using Codecs = std::vector; class IDataType; +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); + /** * Represents interface for compression codecs like LZ4, ZSTD, etc. */ @@ -85,6 +87,8 @@ public: protected: + friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); + /// Return size of compressed data without header virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; } diff --git a/src/Compression/fuzzers/CMakeLists.txt b/src/Compression/fuzzers/CMakeLists.txt index 73b95d09fa3..3b8e056e000 100644 --- a/src/Compression/fuzzers/CMakeLists.txt +++ b/src/Compression/fuzzers/CMakeLists.txt @@ -1,8 +1,14 @@ -add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp) - # Our code has strong cohesion and target associated with `Compression` also depends on `DataTypes`. # But we can exclude some files which have dependencies in case of # fuzzer related build (we are interested in fuzzing only particular part of our code). # So, some symbols will be declared, but not defined. Unfortunately, this trick doesn't work with UBSan. # If you want really small size of the resulted binary, just link with fuzz_compression and clickhouse_common_io + +add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp) target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) + +add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp) +target_link_libraries (lz4_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) + +add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp) +target_link_libraries (delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp new file mode 100644 index 00000000000..861d503712a --- /dev/null +++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp @@ -0,0 +1,47 @@ +#include +#include + +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); +} + +struct AuxiliaryRandomData +{ + UInt8 delta_size_bytes; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + if (size < sizeof(AuxiliaryRandomData)) + return 0; + + auto * p = reinterpret_cast(data); + auto codec = DB::getCompressionCodecDelta(p->delta_size_bytes); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + std::string input = std::string(reinterpret_cast(data), size); + fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + if (output_buffer_size < size) + return 0; + + DB::Memory<> memory; + memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +} diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp new file mode 100644 index 00000000000..12cec0e20d5 --- /dev/null +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -0,0 +1,48 @@ +#include +#include + +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecLZ4(int level); +} + +struct AuxiliaryRandomData +{ + size_t level; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + + if (size < sizeof(AuxiliaryRandomData)) + return 0; + + auto * p = reinterpret_cast(data); + auto codec = DB::getCompressionCodecLZ4(p->level); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + std::string input = std::string(reinterpret_cast(data), size); + fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + if (output_buffer_size < size) + return 0; + + DB::Memory<> memory; + memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +}