fix fuzzers, cmake refactor, add target fuzzers

This commit is contained in:
Yakov Olkhovskiy 2023-09-01 14:20:50 +00:00
parent 8ba08ed9e7
commit 361b21b416
21 changed files with 470 additions and 427 deletions

View File

@ -581,19 +581,23 @@ endfunction()
if (FUZZER)
# Bundle fuzzers target
add_custom_target(fuzzers)
# Instrument all targets having pattern *_fuzzer with fuzzer and link with libfuzzer
# Instrument all targets fuzzer and link with libfuzzer
get_all_targets(all_targets)
foreach(target ${all_targets})
if (target MATCHES ".+_fuzzer")
target_link_libraries(${target} PRIVATE ch_contrib::fuzzer)
target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
message(STATUS "${target} instrumented with fuzzer")
# Skip this fuzzer because of linker errors (the size of the binary is too big)
if (target EQUAL "execute_query_fuzzer")
continue()
if (NOT(target STREQUAL "_fuzzer" OR target STREQUAL "_fuzzer_no_main"))
get_target_property(target_type ${target} TYPE)
if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
endif()
# clickhouse fuzzer isn't working correctly
# initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
#if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
if (target MATCHES ".+_fuzzer")
message(STATUS "${target} instrumented with fuzzer")
target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
# Add to fuzzers bundle
add_dependencies(fuzzers ${target})
endif()
# Add to bundle
add_dependencies(fuzzers ${target})
endif()
endforeach()
endif()

View File

@ -948,48 +948,66 @@ int mainEntryClickHouseLocal(int argc, char ** argv)
#if defined(FUZZING_MODE)
// linked from programs/main.cpp
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv);
std::optional<DB::LocalServer> fuzz_app;
extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
{
int & argc = *pargc;
char ** argv = *pargv;
std::vector<char *> argv(*pargv, *pargv + (*pargc + 1));
if (!isClickhouseApp("local", argv))
{
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse client is available." << "\033[0m" << std::endl;
exit(1);
}
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
/// clickhouse <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
/// clickhouse local <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
/// Calculate the position of delimiter "--" that separates arguments
/// of clickhouse-local and libfuzzer
int pos_delim = argc;
for (int i = 0; i < argc; ++i)
{
if (strcmp(argv[i], "--") == 0)
char **p = &(*pargv)[1];
auto it = argv.begin() + 1;
for (; *it; ++it)
if (strcmp(*it, "--") == 0)
{
pos_delim = i;
++it;
break;
}
}
while (*it)
if (strncmp(*it, "--", 2) != 0)
{
*(p++) = *it;
it = argv.erase(it);
}
else
++it;
*pargc = static_cast<int>(p - &(*pargv)[0]);
*p = nullptr;
/// Initialize clickhouse-local app
fuzz_app.emplace();
fuzz_app->init(pos_delim, argv);
fuzz_app->init(static_cast<int>(argv.size() - 1), argv.data());
/// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore
/// all keys starting with --
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
auto input = String(reinterpret_cast<const char *>(data), size);
DB::FunctionGetFuzzerData::update(input);
fuzz_app->run();
try
{
auto input = String(reinterpret_cast<const char *>(data), size);
DB::FunctionGetFuzzerData::update(input);
fuzz_app->run();
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}
#endif

View File

@ -165,26 +165,6 @@ int printHelp(int, char **)
std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
return -1;
}
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
{
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
if (argv.size() >= 2)
{
auto first_arg = argv.begin() + 1;
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
if (*first_arg == "--" + app_suffix || *first_arg == app_suffix)
{
argv.erase(first_arg);
return true;
}
}
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
std::string app_name = "clickhouse-" + app_suffix;
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}
#endif
@ -407,6 +387,25 @@ void checkHarmfulEnvironmentVariables(char ** argv)
}
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
{
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
if (argv.size() >= 2)
{
auto first_arg = argv.begin() + 1;
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
if (*first_arg == "--" + app_suffix || *first_arg == app_suffix)
{
argv.erase(first_arg);
return true;
}
}
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
std::string app_name = "clickhouse-" + app_suffix;
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}
/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure.
/// We don't use it. But it can be used by some libraries for implementation of "plugins".

View File

@ -6,7 +6,6 @@ set (TCP_PROTOCOL_FUZZER_LINK
clickhouse_aggregate_functions
clickhouse_functions
clickhouse_table_functions
ch_contrib::fuzzer
)
if (TARGET ch_contrib::jemalloc)

View File

@ -66,14 +66,14 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
extern "C"
int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{
if (main_app.wait_for(0s) == std::future_status::ready)
exit(-1);
if (size == 0)
return -1;
try
{
if (main_app.wait_for(0s) == std::future_status::ready)
return -1;
if (size == 0)
return -1;
Poco::Net::SocketAddress address(host, port);
Poco::Net::StreamSocket socket;
@ -111,7 +111,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
}
}
}
catch (const Poco::Exception &)
catch (...)
{
}

View File

@ -17,68 +17,69 @@
#include <base/scope_guard.h>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
try
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
using namespace DB;
MainThreadStatus::getInstance();
static SharedContextHolder shared_context;
static ContextMutablePtr context;
registerAggregateFunctions();
return true;
};
auto initialize = [&]() mutable
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
static bool initialized = initialize();
(void) initialized;
MainThreadStatus::getInstance();
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
registerAggregateFunctions();
return true;
};
/// The input format is as follows:
/// - the aggregate function name on the first line, possible with parameters, then data types of the arguments,
/// example: quantile(0.5), Float64
/// - the serialized aggregation state for the rest of the input.
static bool initialized = initialize();
(void) initialized;
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/aggregate_function_state_deserialization
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/DataTypes/fuzzers/aggregate_function_state_deserialization corpus -jobs=64 -rss_limit_mb=8192
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
DB::ReadBufferFromMemory in(data, size);
/// The input format is as follows:
/// - the aggregate function name on the first line, possible with parameters, then data types of the arguments,
/// example: quantile(0.5), Float64
/// - the serialized aggregation state for the rest of the input.
String args;
readStringUntilNewlineInto(args, in);
assertChar('\n', in);
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/aggregate_function_state_deserialization
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/DataTypes/fuzzers/aggregate_function_state_deserialization corpus -jobs=64 -rss_limit_mb=8192
DataTypePtr type = DataTypeFactory::instance().get(fmt::format("AggregateFunction({})", args));
AggregateFunctionPtr func = assert_cast<const DataTypeAggregateFunction &>(*type).getFunction();
DB::ReadBufferFromMemory in(data, size);
Arena arena;
char * place = arena.alignedAlloc(func->sizeOfData(), func->alignOfData());
func->create(place);
SCOPE_EXIT(func->destroy(place));
func->deserialize(place, in, {}, &arena);
String args;
readStringUntilNewlineInto(args, in);
assertChar('\n', in);
DataTypePtr type = DataTypeFactory::instance().get(fmt::format("AggregateFunction({})", args));
AggregateFunctionPtr func = assert_cast<const DataTypeAggregateFunction &>(*type).getFunction();
Arena arena;
char * place = arena.alignedAlloc(func->sizeOfData(), func->alignOfData());
func->create(place);
SCOPE_EXIT(func->destroy(place));
func->deserialize(place, in, {}, &arena);
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -4,17 +4,18 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
DB::ReadBufferFromMemory from(data, size);
DB::CompressedReadBuffer in{from};
try
{
DB::ReadBufferFromMemory from(data, size);
DB::CompressedReadBuffer in{from};
while (!in.eof())
in.next();
while (!in.eof())
in.next();
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -15,29 +15,30 @@ struct AuxiliaryRandomData
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDelta(p->delta_size_bytes);
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDelta(p->delta_size_bytes);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -15,29 +15,30 @@ struct AuxiliaryRandomData
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDoubleDelta(p->data_bytes_size);
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDoubleDelta(p->data_bytes_size);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -271,33 +271,35 @@ void XMLGenerator::generate()
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
XMLGenerator generator(data, size);
try
{
XMLGenerator generator(data, size);
generator.generate();
if (generator.hasError())
return 0;
generator.generate();
if (generator.hasError())
return 0;
auto config = generator.getResult();
auto codec_128 = getCompressionCodecEncrypted(DB::AES_128_GCM_SIV);
auto codec_256 = getCompressionCodecEncrypted(DB::AES_256_GCM_SIV);
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "");
auto config = generator.getResult();
auto codec_128 = getCompressionCodecEncrypted(DB::AES_128_GCM_SIV);
auto codec_256 = getCompressionCodecEncrypted(DB::AES_256_GCM_SIV);
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "");
size_t data_size = size - generator.keySize();
size_t data_size = size - generator.keySize();
std::string input = std::string(reinterpret_cast<const char*>(data), data_size);
fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, data_size, input.size() - 31);
std::string input = std::string(reinterpret_cast<const char*>(data), data_size);
fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, data_size, input.size() - 31);
DB::Memory<> memory;
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size()) - 31);
DB::Memory<> memory;
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size()) - 31);
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size()) - 31);
}
catch (...)
{
}
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size()) - 31);
return 0;
}
catch (...)
{
return 1;
}

View File

@ -16,31 +16,31 @@ struct AuxiliaryRandomData
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
try
{
if (size < sizeof(AuxiliaryRandomData) + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER)
return 0;
if (size < sizeof(AuxiliaryRandomData) + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER)
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
size -= LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER;
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
size -= LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER;
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
DB::Memory<> memory;
memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -3,15 +3,16 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
DB::ReadBufferFromMemory in(data, size);
DB::NamesAndTypesList res;
res.readText(in);
try
{
DB::ReadBufferFromMemory in(data, size);
DB::NamesAndTypesList res;
res.readText(in);
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -14,69 +14,70 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
try
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
using namespace DB;
MainThreadStatus::getInstance();
static SharedContextHolder shared_context;
static ContextMutablePtr context;
registerAggregateFunctions();
return true;
};
auto initialize = [&]() mutable
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
static bool initialized = initialize();
(void) initialized;
MainThreadStatus::getInstance();
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
registerAggregateFunctions();
return true;
};
/// The input format is as follows:
/// - data type name on the first line,
/// - the data for the rest of the input.
static bool initialized = initialize();
(void) initialized;
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/data_type_deserialization
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/DataTypes/fuzzers/data_type_deserialization_fuzzer corpus -jobs=64 -rss_limit_mb=8192
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
/// clickhouse-local --query "SELECT toJSONString(*) FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.data_type_families)" > dictionary
/// The input format is as follows:
/// - data type name on the first line,
/// - the data for the rest of the input.
DB::ReadBufferFromMemory in(data, size);
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/data_type_deserialization
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/DataTypes/fuzzers/data_type_deserialization_fuzzer corpus -jobs=64 -rss_limit_mb=8192
String data_type;
readStringUntilNewlineInto(data_type, in);
assertChar('\n', in);
/// clickhouse-local --query "SELECT toJSONString(*) FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.data_type_families)" > dictionary
DataTypePtr type = DataTypeFactory::instance().get(data_type);
DB::ReadBufferFromMemory in(data, size);
FormatSettings settings;
settings.max_binary_string_size = 100;
settings.max_binary_array_size = 100;
String data_type;
readStringUntilNewlineInto(data_type, in);
assertChar('\n', in);
Field field;
type->getDefaultSerialization()->deserializeBinary(field, in, settings);
DataTypePtr type = DataTypeFactory::instance().get(data_type);
FormatSettings settings;
settings.max_binary_string_size = 100;
settings.max_binary_array_size = 100;
Field field;
type->getDefaultSerialization()->deserializeBinary(field, in, settings);
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -22,112 +22,113 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
try
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
using namespace DB;
MainThreadStatus::getInstance();
static SharedContextHolder shared_context;
static ContextMutablePtr context;
registerAggregateFunctions();
registerFormats();
auto initialize = [&]() mutable
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
return true;
};
MainThreadStatus::getInstance();
static bool initialized = initialize();
(void) initialized;
registerAggregateFunctions();
registerFormats();
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
return true;
};
/// The input format is as follows:
/// - format name on the first line,
/// - table structure on the second line,
/// - the data for the rest of the input.
static bool initialized = initialize();
(void) initialized;
/** The corpus was generated as follows:
total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters();
CurrentThread::get().memory_tracker.setHardLimit(1_GiB);
i=0; find ../../../../tests/queries -name '*.sql' |
xargs -I{} bash -c "tr '\n' ' ' <{}; echo" |
rg -o -i 'CREATE TABLE\s+\w+\s+\(.+?\) ENGINE' |
sed -r -e 's/CREATE TABLE\s+\w+\s+\((.+?)\) ENGINE/\1/i' | sort | uniq |
while read line; do
i=$((i+1));
clickhouse-local --query "SELECT name FROM system.formats ORDER BY rand() LIMIT 1" >> $i;
echo "$line" >> $i;
echo $RANDOM >> $i;
echo $i;
/// The input format is as follows:
/// - format name on the first line,
/// - table structure on the second line,
/// - the data for the rest of the input.
/** The corpus was generated as follows:
i=0; find ../../../../tests/queries -name '*.sql' |
xargs -I{} bash -c "tr '\n' ' ' <{}; echo" |
rg -o -i 'CREATE TABLE\s+\w+\s+\(.+?\) ENGINE' |
sed -r -e 's/CREATE TABLE\s+\w+\s+\((.+?)\) ENGINE/\1/i' | sort | uniq |
while read line; do
i=$((i+1));
clickhouse-local --query "SELECT name FROM system.formats ORDER BY rand() LIMIT 1" >> $i;
echo "$line" >> $i;
echo $RANDOM >> $i;
echo $i;
done
*/
/** And:
for format in $(clickhouse-client --query "SELECT name FROM system.formats WHERE is_output"); do
echo $format;
echo $format >> $format;
echo "WatchID Int64, JavaEnable Int16, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID Int32, ClientIP Int32, RegionID Int32, UserID Int64, CounterClass Int16, OS Int16, UserAgent Int16, URL String, Referer String, IsRefresh Int16, RefererCategoryID Int16, RefererRegionID Int32, URLCategoryID Int16, URLRegionID Int32, ResolutionWidth Int16, ResolutionHeight Int16, ResolutionDepth Int16, FlashMajor Int16, FlashMinor Int16, FlashMinor2 String, NetMajor Int16, NetMinor Int16, UserAgentMajor Int16, UserAgentMinor String, CookieEnable Int16, JavascriptEnable Int16, IsMobile Int16, MobilePhone Int16, MobilePhoneModel String, Params String, IPNetworkID Int32, TraficSourceID Int16, SearchEngineID Int16, SearchPhrase String, AdvEngineID Int16, IsArtifical Int16, WindowClientWidth Int16, WindowClientHeight Int16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 Int16, SilverlightVersion2 Int16, SilverlightVersion3 Int32, SilverlightVersion4 Int16, PageCharset String, CodeVersion Int32, IsLink Int16, IsDownload Int16, IsNotBounce Int16, FUniqID Int64, OriginalURL String, HID Int32, IsOldCounter Int16, IsEvent Int16, IsParameter Int16, DontCountHits Int16, WithHash Int16, HitColor String, LocalEventTime DateTime, Age Int16, Sex Int16, Income Int16, Interests Int16, Robotness Int16, RemoteIP Int32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage String, BrowserCountry String, SocialNetwork String, SocialAction String, HTTPError Int16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, SocialSourceNetworkID Int16, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency String, ParamCurrencyID Int16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID Int16, RefererHash Int64, URLHash Int64, CLID Int32" >> $format;
clickhouse-client --query "SELECT * FROM hits LIMIT 10 FORMAT $format" >> $format || rm $format;
done
*/
/** And:
*/
for format in $(clickhouse-client --query "SELECT name FROM system.formats WHERE is_output"); do
echo $format;
echo $format >> $format;
echo "WatchID Int64, JavaEnable Int16, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID Int32, ClientIP Int32, RegionID Int32, UserID Int64, CounterClass Int16, OS Int16, UserAgent Int16, URL String, Referer String, IsRefresh Int16, RefererCategoryID Int16, RefererRegionID Int32, URLCategoryID Int16, URLRegionID Int32, ResolutionWidth Int16, ResolutionHeight Int16, ResolutionDepth Int16, FlashMajor Int16, FlashMinor Int16, FlashMinor2 String, NetMajor Int16, NetMinor Int16, UserAgentMajor Int16, UserAgentMinor String, CookieEnable Int16, JavascriptEnable Int16, IsMobile Int16, MobilePhone Int16, MobilePhoneModel String, Params String, IPNetworkID Int32, TraficSourceID Int16, SearchEngineID Int16, SearchPhrase String, AdvEngineID Int16, IsArtifical Int16, WindowClientWidth Int16, WindowClientHeight Int16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 Int16, SilverlightVersion2 Int16, SilverlightVersion3 Int32, SilverlightVersion4 Int16, PageCharset String, CodeVersion Int32, IsLink Int16, IsDownload Int16, IsNotBounce Int16, FUniqID Int64, OriginalURL String, HID Int32, IsOldCounter Int16, IsEvent Int16, IsParameter Int16, DontCountHits Int16, WithHash Int16, HitColor String, LocalEventTime DateTime, Age Int16, Sex Int16, Income Int16, Interests Int16, Robotness Int16, RemoteIP Int32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage String, BrowserCountry String, SocialNetwork String, SocialAction String, HTTPError Int16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, SocialSourceNetworkID Int16, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency String, ParamCurrencyID Int16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID Int16, RefererHash Int64, URLHash Int64, CLID Int32" >> $format;
clickhouse-client --query "SELECT * FROM hits LIMIT 10 FORMAT $format" >> $format || rm $format;
done
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/format_fuzzer
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/Formats/fuzzers/format_fuzzer corpus -jobs=64 -rss_limit_mb=8192
*/
DB::ReadBufferFromMemory in(data, size);
/// Compile the code as follows:
/// mkdir build_asan_fuzz
/// cd build_asan_fuzz
/// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 ..
///
/// The corpus is located here:
/// https://github.com/ClickHouse/fuzz-corpus/tree/main/format_fuzzer
///
/// The fuzzer can be run as follows:
/// ../../../build_asan_fuzz/src/Formats/fuzzers/format_fuzzer corpus -jobs=64 -rss_limit_mb=8192
String format;
readStringUntilNewlineInto(format, in);
assertChar('\n', in);
DB::ReadBufferFromMemory in(data, size);
String structure;
readStringUntilNewlineInto(structure, in);
assertChar('\n', in);
String format;
readStringUntilNewlineInto(format, in);
assertChar('\n', in);
ColumnsDescription description = parseColumnsListFromString(structure, context);
auto columns_info = description.getOrdinary();
String structure;
readStringUntilNewlineInto(structure, in);
assertChar('\n', in);
Block header;
for (const auto & info : columns_info)
{
ColumnWithTypeAndName column;
column.name = info.name;
column.type = info.type;
column.column = column.type->createColumn();
header.insert(std::move(column));
}
ColumnsDescription description = parseColumnsListFromString(structure, context);
auto columns_info = description.getOrdinary();
InputFormatPtr input_format = context->getInputFormat(format, in, header, 13 /* small block size */);
Block header;
for (const auto & info : columns_info)
{
ColumnWithTypeAndName column;
column.name = info.name;
column.type = info.type;
column.column = column.type->createColumn();
header.insert(std::move(column));
QueryPipeline pipeline(Pipe(std::move(input_format)));
PullingPipelineExecutor executor(pipeline);
Block res;
while (executor.pull(res))
;
}
catch (...)
{
}
InputFormatPtr input_format = context->getInputFormat(format, in, header, 13 /* small block size */);
QueryPipeline pipeline(Pipe(std::move(input_format)));
PullingPipelineExecutor executor(pipeline);
Block res;
while (executor.pull(res))
;
return 0;
}
catch (...)
{
return 1;
}

View File

@ -13,43 +13,44 @@
using namespace DB;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
std::string input = std::string(reinterpret_cast<const char*>(data), size);
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
try
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
std::string input = std::string(reinterpret_cast<const char*>(data), size);
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);
registerFormats();
static SharedContextHolder shared_context;
static ContextMutablePtr context;
return true;
};
auto initialize = [&]() mutable
{
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
static bool initialized = initialize();
(void) initialized;
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);
registerFormats();
auto io = DB::executeQuery(input, context, true, QueryProcessingStage::Complete);
return true;
};
PullingPipelineExecutor executor(io.pipeline);
Block res;
while (!res && executor.pull(res));
static bool initialized = initialize();
(void) initialized;
auto io = DB::executeQuery(input, context, true, QueryProcessingStage::Complete);
PullingPipelineExecutor executor(io.pipeline);
Block res;
while (!res && executor.pull(res));
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -44,5 +44,7 @@ set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "
# contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof]
target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof)
target_link_libraries(protoc ch_contrib::fuzzer)
target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms)

View File

@ -8,27 +8,28 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
std::string input = std::string(reinterpret_cast<const char*>(data), size);
try
{
std::string input = std::string(reinterpret_cast<const char*>(data), size);
DB::ParserCreateQuery parser;
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
DB::ParserCreateQuery parser;
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
const UInt64 max_ast_depth = 1000;
ast->checkDepth(max_ast_depth);
const UInt64 max_ast_depth = 1000;
ast->checkDepth(max_ast_depth);
const UInt64 max_ast_elements = 50000;
ast->checkSize(max_ast_elements);
const UInt64 max_ast_elements = 50000;
ast->checkSize(max_ast_elements);
DB::WriteBufferFromOwnString wb;
DB::formatAST(*ast, wb);
DB::WriteBufferFromOwnString wb;
DB::formatAST(*ast, wb);
std::cerr << wb.str() << std::endl;
std::cerr << wb.str() << std::endl;
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -8,21 +8,27 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{
DB::String query;
DB::ReadBufferFromMemory in(data, size);
readStringUntilEOF(query, in);
DB::Lexer lexer(query.data(), query.data() + query.size());
while (true)
try
{
DB::Token token = lexer.nextToken();
DB::String query;
DB::ReadBufferFromMemory in(data, size);
readStringUntilEOF(query, in);
if (token.isEnd())
break;
DB::Lexer lexer(query.data(), query.data() + query.size());
if (token.isError())
return 1;
while (true)
{
DB::Token token = lexer.nextToken();
if (token.isEnd())
break;
if (token.isError())
return 0;
}
}
catch (...)
{
}
return 0;

View File

@ -7,29 +7,30 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
std::string input = std::string(reinterpret_cast<const char*>(data), size);
try
{
std::string input = std::string(reinterpret_cast<const char*>(data), size);
DB::ParserQueryWithOutput parser(input.data() + input.size());
DB::ParserQueryWithOutput parser(input.data() + input.size());
const UInt64 max_parser_depth = 1000;
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth);
const UInt64 max_parser_depth = 1000;
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth);
const UInt64 max_ast_depth = 1000;
ast->checkDepth(max_ast_depth);
const UInt64 max_ast_depth = 1000;
ast->checkDepth(max_ast_depth);
const UInt64 max_ast_elements = 50000;
ast->checkSize(max_ast_elements);
const UInt64 max_ast_elements = 50000;
ast->checkSize(max_ast_elements);
DB::WriteBufferFromOwnString wb;
DB::formatAST(*ast, wb);
DB::WriteBufferFromOwnString wb;
DB::formatAST(*ast, wb);
std::cerr << wb.str() << std::endl;
std::cerr << wb.str() << std::endl;
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -2,14 +2,16 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
using namespace DB;
ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast<const char *>(data), size));
std::cerr << columns.toString() << "\n";
try
{
using namespace DB;
ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast<const char *>(data), size));
std::cerr << columns.toString() << "\n";
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}

View File

@ -5,19 +5,20 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
DB::ReadBufferFromMemory in(data, size);
DB::MergeTreeDataPartChecksums res;
DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);
try
{
DB::ReadBufferFromMemory in(data, size);
DB::MergeTreeDataPartChecksums res;
DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);
if (!res.read(in))
return 1;
res.write(out);
if (!res.read(in))
return 0;
res.write(out);
}
catch (...)
{
}
return 0;
}
catch (...)
{
return 1;
}