Merge pull request #27526 from mathalex/get_fuzz_data

Added `getFuzzerData` function
This commit is contained in:
Nikita Mikhaylov 2021-12-07 15:05:16 +03:00 committed by GitHub
commit 514120adfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 186 additions and 7 deletions

View File

@ -149,6 +149,10 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
# For codegen_select_fuzzer
set (ENABLE_PROTOBUF 1)
set (USE_INTERNAL_PROTOBUF_LIBRARY 1)
endif()
# Global libraries

View File

@ -31,5 +31,6 @@ do
mv "$FUZZER_PATH" /output/fuzzers
done
tar -zcvf /output/fuzzers.tar.gz /output/fuzzers
rm -rf /output/fuzzers

View File

@ -473,3 +473,7 @@ if (ENABLE_TESTS AND USE_GTEST)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
add_dependencies(clickhouse-bundle clickhouse-tests)
endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()

View File

@ -17,3 +17,9 @@ clickhouse_program_add(local)
if(NOT CLICKHOUSE_ONE_SHARED)
target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib)
endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
set (WITH_COVERAGE ON)
target_link_libraries(clickhouse-local-lib PRIVATE ${LIB_FUZZING_ENGINE})
endif ()

View File

@ -41,6 +41,10 @@
#include <base/argsToConfig.h>
#include <filesystem>
#if defined(FUZZING_MODE)
#include <Functions/getFuzzerData.h>
#endif
namespace fs = std::filesystem;
@ -407,10 +411,25 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)
{
if (queries_files.empty() && !config().has("query"))
{
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl;
exit(1);
}
is_interactive = false;
#else
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty()));
#endif
if (!is_interactive)
{
/// We will terminate process on error
@ -439,6 +458,11 @@ try
connect();
#ifdef FUZZING_MODE
first_time = false;
}
#endif
if (is_interactive && !delayed_interactive)
{
runInteractive();
@ -451,7 +475,9 @@ try
runInteractive();
}
#ifndef FUZZING_MODE
cleanup();
#endif
return Application::EXIT_OK;
}
catch (const DB::Exception & e)
@ -653,7 +679,7 @@ void LocalServer::processConfig()
}
static std::string getHelpHeader()
[[ maybe_unused ]] static std::string getHelpHeader()
{
return
"usage: clickhouse-local [initial table definition] [--query <query>]\n"
@ -669,7 +695,7 @@ static std::string getHelpHeader()
}
static std::string getHelpFooter()
[[ maybe_unused ]] static std::string getHelpFooter()
{
return
"Example printing memory used by each Unix user:\n"
@ -680,11 +706,23 @@ static std::string getHelpFooter()
}
void LocalServer::printHelpMessage(const OptionsDescription & options_description)
void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description)
{
#if defined(FUZZING_MODE)
std::cout <<
"usage: clickhouse <clickhouse-local arguments> -- <libfuzzer arguments>\n"
"Note: It is important not to use only one letter keys with single dash for \n"
"for clickhouse-local arguments. It may work incorrectly.\n"
"ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n"
"You have to provide a query which contains getFuzzerData function.\n"
"This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n"
"Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n";
#else
std::cout << getHelpHeader() << "\n";
std::cout << options_description.main_description.value() << "\n";
std::cout << getHelpFooter() << "\n";
#endif
}
@ -781,3 +819,51 @@ int mainEntryClickHouseLocal(int argc, char ** argv)
return code ? code : 1;
}
}
#if defined(FUZZING_MODE)
std::optional<DB::LocalServer> fuzz_app;
extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
{
int & argc = *pargc;
char ** argv = *pargv;
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
/// clickhouse <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
/// Calculate the position of delimiter "--" that separates arguments
/// of clickhouse-local and libfuzzer
int pos_delim = argc;
for (int i = 0; i < argc; ++i)
{
if (strcmp(argv[i], "--") == 0)
{
pos_delim = i;
break;
}
}
/// Initialize clickhouse-local app
fuzz_app.emplace();
fuzz_app->init(pos_delim, argv);
/// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore
/// all keys starting with --
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
auto input = String(reinterpret_cast<const char *>(data), size);
DB::FunctionGetFuzzerData::update(input);
fuzz_app->run();
return 0;
}
catch (...)
{
return 1;
}
#endif

View File

@ -88,6 +88,7 @@ namespace
using MainFunc = int (*)(int, char**);
#if !defined(FUZZING_MODE)
/// Add an item here to register new application
std::pair<const char *, MainFunc> clickhouse_applications[] =
@ -141,7 +142,6 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
{"hash-binary", mainEntryClickHouseHashBinary},
};
int printHelp(int, char **)
{
std::cerr << "Use one of the following commands:" << std::endl;
@ -150,7 +150,6 @@ int printHelp(int, char **)
return -1;
}
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
{
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
@ -170,6 +169,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
std::string app_name = "clickhouse-" + app_suffix;
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}
#endif
enum class InstructionFail
@ -342,9 +342,13 @@ struct Checker
///
/// extern bool inside_main;
/// class C { C() { assert(inside_main); } };
#ifndef FUZZING_MODE
bool inside_main = false;
#else
bool inside_main = true;
#endif
#if !defined(FUZZING_MODE)
int main(int argc_, char ** argv_)
{
inside_main = true;
@ -375,3 +379,4 @@ int main(int argc_, char ** argv_)
return main_func(static_cast<int>(argv.size()), argv.data());
}
#endif

View File

@ -123,3 +123,7 @@ set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-s
# target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY})
target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR})
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()

View File

@ -0,0 +1,11 @@
#include <Functions/getFuzzerData.h>
namespace DB
{
void registerFunctionGetFuzzerData(FunctionFactory & factory)
{
factory.registerFunction<FunctionGetFuzzerData>();
}
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <Core/Field.h>
namespace DB
{
class FunctionGetFuzzerData : public IFunction
{
inline static String fuzz_data;
public:
static constexpr auto name = "getFuzzerData";
inline static FunctionPtr create(ContextPtr) { return create(); }
static FunctionPtr create()
{
return std::make_shared<FunctionGetFuzzerData>();
}
inline String getName() const override { return name; }
inline size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeString>();
}
inline bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName &,
const DataTypePtr &,
size_t input_rows_count) const override
{
return DataTypeString().createColumnConst(input_rows_count, fuzz_data);
}
static void update(const String & fuzz_data_)
{
fuzz_data = fuzz_data_;
}
};
}

View File

@ -85,6 +85,10 @@ void registerFunctionGetOSKernelVersion(FunctionFactory &);
void registerFunctionConvertCharset(FunctionFactory &);
#endif
#ifdef FUZZING_MODE
void registerFunctionGetFuzzerData(FunctionFactory & factory);
#endif
void registerFunctionsMiscellaneous(FunctionFactory & factory)
{
registerFunctionCurrentDatabase(factory);
@ -166,6 +170,10 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
#if USE_ICU
registerFunctionConvertCharset(factory);
#endif
#ifdef FUZZING_MODE
registerFunctionGetFuzzerData(factory);
#endif
}
}