Replace getFuzzerData with query text fuzzer in clickhouse-local

This commit is contained in:
Alexey Milovidov 2024-03-18 02:16:52 +01:00
parent 5000e3e10b
commit 1a61da1bae
8 changed files with 66 additions and 133 deletions

View File

@ -102,6 +102,8 @@ if (ENABLE_FUZZING)
# For codegen_select_fuzzer
set (ENABLE_PROTOBUF 1)
add_compile_definitions(FUZZING_MODE=1)
endif()
# Global libraries
@ -574,7 +576,7 @@ if (FUZZER)
if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
endif()
if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse"))
if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
message(STATUS "${target} instrumented with fuzzer")
target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
# Add to fuzzers bundle
@ -583,6 +585,12 @@ if (FUZZER)
get_target_property(target_bin_dir ${target} BINARY_DIR)
add_custom_command(TARGET fuzzers POST_BUILD COMMAND mv "${target_bin_dir}/${target_bin_name}" "${CMAKE_CURRENT_BINARY_DIR}/programs/" VERBATIM)
endif()
if (target STREQUAL "clickhouse")
message(STATUS "${target} instrumented with fuzzer")
target_link_libraries(${target} PUBLIC ch_contrib::fuzzer_no_main)
# Add to fuzzers bundle
add_dependencies(fuzzers ${target})
endif()
endif()
endforeach()
add_custom_command(TARGET fuzzers POST_BUILD COMMAND SRC=${CMAKE_SOURCE_DIR} BIN=${CMAKE_BINARY_DIR} OUT=${CMAKE_BINARY_DIR}/programs ${CMAKE_SOURCE_DIR}/tests/fuzz/build.sh VERBATIM)

View File

@ -268,10 +268,6 @@ if (ENABLE_TESTS)
add_dependencies(clickhouse-bundle clickhouse-tests)
endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()
if (TARGET ch_contrib::protobuf)
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
foreach (proto_file IN LISTS google_proto_files)

View File

@ -25,9 +25,3 @@ endif()
# Always use internal readpassphrase
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
set (WITH_COVERAGE ON)
target_link_libraries(clickhouse-local-lib PRIVATE ${LIB_FUZZING_ENGINE})
endif ()

View File

@ -4,7 +4,6 @@
#include <Common/logger_useful.h>
#include <Common/formatReadable.h>
#include <base/getMemoryAmount.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
#include <Poco/Logger.h>
@ -22,8 +21,6 @@
#include <Interpreters/loadMetadata.h>
#include <Interpreters/registerInterpreters.h>
#include <base/getFQDNOrHostName.h>
#include <Common/scope_guard_safe.h>
#include <Interpreters/Session.h>
#include <Access/AccessControl.h>
#include <Common/PoolId.h>
#include <Common/Exception.h>
@ -34,7 +31,6 @@
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
#include <Common/ThreadPool.h>
#include <Loggers/Loggers.h>
#include <Loggers/OwnFormattingChannel.h>
#include <Loggers/OwnPatternFormatter.h>
#include <IO/ReadBufferFromFile.h>
@ -42,7 +38,6 @@
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/UseSSL.h>
#include <IO/SharedThreadPools.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTInsertQuery.h>
#include <Common/ErrorHandlers.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
@ -61,10 +56,6 @@
#include "config.h"
#if defined(FUZZING_MODE)
#include <Functions/getFuzzerData.h>
#endif
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
@ -474,25 +465,10 @@ try
}
}
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)
{
if (queries_files.empty() && queries.empty())
{
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl;
exit(1);
}
is_interactive = false;
#else
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
#endif
if (!is_interactive)
{
/// We will terminate process on error
@ -538,15 +514,13 @@ try
connect();
#ifdef FUZZING_MODE
first_time = false;
}
#endif
String initial_query = getInitialCreateTableQuery();
if (!initial_query.empty())
processQueryText(initial_query);
#if defined(FUZZING_MODE)
runLibFuzzer();
#else
if (is_interactive && !delayed_interactive)
{
runInteractive();
@ -558,10 +532,8 @@ try
if (delayed_interactive)
runInteractive();
}
#ifndef FUZZING_MODE
cleanup();
#endif
return Application::EXIT_OK;
}
catch (const DB::Exception & e)
@ -829,23 +801,11 @@ void LocalServer::processConfig()
void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description)
{
#if defined(FUZZING_MODE)
std::cout <<
"usage: clickhouse <clickhouse-local arguments> -- <libfuzzer arguments>\n"
"Note: It is important not to use only one letter keys with single dash for \n"
"for clickhouse-local arguments. It may work incorrectly.\n"
"ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n"
"You have to provide a query which contains getFuzzerData function.\n"
"This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n"
"Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n";
#else
std::cout << getHelpHeader() << "\n";
std::cout << options_description.main_description.value() << "\n";
std::cout << getHelpFooter() << "\n";
std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/\n";
#endif
}
@ -921,6 +881,7 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
@ -952,9 +913,27 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else if (arg == "--")
{
#if defined(FUZZING_MODE)
fuzzer_argc = 1 + argc - arg_num;
fuzzer_argv = argv + arg_num;
break;
#endif
}
else
{
common_arguments.emplace_back(arg);
}
}
#if defined(FUZZING_MODE)
if (!fuzzer_argc)
{
fuzzer_argc = 1;
fuzzer_argv = argv;
}
#endif
}
}
@ -988,67 +967,3 @@ int mainEntryClickHouseLocal(int argc, char ** argv)
return code ? code : 1;
}
}
#if defined(FUZZING_MODE)
// linked from programs/main.cpp
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv);
std::optional<DB::LocalServer> fuzz_app;
extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
{
std::vector<char *> argv(*pargv, *pargv + (*pargc + 1));
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
/// clickhouse local <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
char **p = &(*pargv)[1];
auto it = argv.begin() + 1;
for (; *it; ++it)
{
if (strcmp(*it, "--") == 0)
{
++it;
break;
}
}
while (*it)
{
if (strncmp(*it, "--", 2) != 0)
{
*(p++) = *it;
it = argv.erase(it);
}
else
++it;
}
*pargc = static_cast<int>(p - &(*pargv)[0]);
*p = nullptr;
/// Initialize clickhouse-local app
fuzz_app.emplace();
fuzz_app->init(static_cast<int>(argv.size() - 1), argv.data());
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{
try
{
auto input = String(reinterpret_cast<const char *>(data), size);
DB::FunctionGetFuzzerData::update(input);
fuzz_app->run();
}
catch (...)
{
}
return 0;
}
#endif

View File

@ -67,7 +67,6 @@ namespace
using MainFunc = int (*)(int, char**);
#if !defined(FUZZING_MODE)
/// Add an item here to register new application
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{
@ -111,7 +110,6 @@ int printHelp(int, char **)
std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
return -1;
}
#endif
/// Add an item here to register a new short name
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
@ -284,7 +282,7 @@ struct Checker
;
#if !defined(FUZZING_MODE) && !defined(USE_MUSL)
#if !defined(USE_MUSL)
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
void checkHarmfulEnvironmentVariables(char ** argv)
{
@ -446,13 +444,8 @@ extern "C"
///
/// extern bool inside_main;
/// class C { C() { assert(inside_main); } };
#ifndef FUZZING_MODE
bool inside_main = false;
#else
bool inside_main = true;
#endif
#if !defined(FUZZING_MODE)
int main(int argc_, char ** argv_)
{
inside_main = true;
@ -510,4 +503,3 @@ int main(int argc_, char ** argv_)
return exit_code;
}
#endif

View File

@ -2631,6 +2631,31 @@ void ClientBase::runNonInteractive()
}
#if defined(FUZZING_MODE)
extern "C" int LLVMFuzzerRunDriver(int * argc, char *** argv, int (*callback)(const uint8_t * data, size_t size));
ClientBase * app;
void ClientBase::runLibFuzzer()
{
app = this;
LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)
{
try
{
String query(reinterpret_cast<const char *>(data), size);
app->processQueryText(query);
}
catch (...)
{
return -1;
}
return 0;
});
}
#endif
void ClientBase::clearTerminal()
{
/// Clear from cursor until end of screen.

View File

@ -78,6 +78,13 @@ protected:
void runInteractive();
void runNonInteractive();
#if defined(FUZZING_MODE)
int fuzzer_argc = 0;
char ** fuzzer_argv = nullptr;
void runLibFuzzer();
#endif
virtual bool processWithFuzzing(const String &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Query processing with fuzzing is not implemented");

View File

@ -142,10 +142,6 @@ list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_extractkeyvaluepa
# Signed integer overflow on user-provided data inside boost::geometry - ignore.
set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow)
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()
if (USE_GPERF)
# Only for regenerating
add_custom_target(generate-html-char-ref-gperf ./HTMLCharacterReference.sh