Merge branch 'master' into update-fuzzer-dict

This commit is contained in:
Pablo Marcos 2024-08-13 15:01:52 +00:00
commit 25d1a782bc
22 changed files with 81 additions and 116 deletions

View File

@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
---
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
---
- [ ] <!---do_not_test--> Do not test
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
- [ ] <!---upload_all--> Upload binaries for special builds

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")

View File

@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
if (WITH_COVERAGE)
message (STATUS "Enabled instrumentation for code coverage")
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)

@ -1 +1 @@
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa

View File

@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
bash -x /build/packages/build
fi
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
mv ./programs/clickhouse* /output ||:
mv ./programs/*_fuzzer /output ||:
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")

View File

@ -10,6 +10,7 @@
#include <Poco/Net/SocketAddress.h>
#include <Poco/Net/StreamSocket.h>
#include <Daemon/BaseDaemon.h>
#include <Interpreters/Context.h>
@ -25,6 +26,12 @@ static int64_t port = 9000;
using namespace std::chrono_literals;
void on_exit()
{
BaseDaemon::terminate();
main_app.wait();
}
extern "C"
int LLVMFuzzerInitialize(int * argc, char ***argv)
{
@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
exit(-1);
}
atexit(on_exit);
return 0;
}

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_INCLUDE_WHAT_YOU_USE)
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})

View File

@ -2751,7 +2751,7 @@ void ClientBase::runLibFuzzer()
for (auto & arg : fuzzer_args_holder)
fuzzer_args.emplace_back(arg.data());
int fuzzer_argc = fuzzer_args.size();
int fuzzer_argc = static_cast<int>(fuzzer_args.size());
char ** fuzzer_argv = fuzzer_args.data();
LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
target_link_libraries (names_and_types_fuzzer PRIVATE dbms clickhouse_functions)
target_link_libraries (names_and_types_fuzzer PRIVATE clickhouse_functions)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
target_link_libraries(data_type_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS})
target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
target_link_libraries(format_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)

View File

@ -3,7 +3,6 @@
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadHelpers.h>
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
#include <QueryPipeline/Pipe.h>

View File

@ -39,7 +39,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier -Wno-extra-semi-stmt -Wno-used-but-marked-unused")
# contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof]
target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof)

View File

@ -12,6 +12,7 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -203,25 +204,15 @@ template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
void ORCBlockOutputFormat::writeNumbers(
orc::ColumnVectorBatch & orc_column,
const IColumn & column,
const PaddedPODArray<UInt8> * null_bytemap,
const PaddedPODArray<UInt8> * /*null_bytemap*/,
ConvertFunc convert)
{
NumberVectorBatch & number_orc_column = dynamic_cast<NumberVectorBatch &>(orc_column);
const auto & number_column = assert_cast<const ColumnVector<NumberType> &>(column);
number_orc_column.resize(number_column.size());
number_orc_column.data.resize(number_column.size());
for (size_t i = 0; i != number_column.size(); ++i)
{
if (null_bytemap && (*null_bytemap)[i])
{
number_orc_column.notNull[i] = 0;
continue;
}
number_orc_column.notNull[i] = 1;
number_orc_column.data[i] = convert(number_column.getElement(i));
}
number_orc_column.numElements = number_column.size();
}
template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
@ -229,7 +220,7 @@ void ORCBlockOutputFormat::writeDecimals(
orc::ColumnVectorBatch & orc_column,
const IColumn & column,
DataTypePtr & type,
const PaddedPODArray<UInt8> * null_bytemap,
const PaddedPODArray<UInt8> * /*null_bytemap*/,
ConvertFunc convert)
{
DecimalVectorBatch & decimal_orc_column = dynamic_cast<DecimalVectorBatch &>(orc_column);
@ -238,71 +229,49 @@ void ORCBlockOutputFormat::writeDecimals(
decimal_orc_column.precision = decimal_type->getPrecision();
decimal_orc_column.scale = decimal_type->getScale();
decimal_orc_column.resize(decimal_column.size());
for (size_t i = 0; i != decimal_column.size(); ++i)
{
if (null_bytemap && (*null_bytemap)[i])
{
decimal_orc_column.notNull[i] = 0;
continue;
}
decimal_orc_column.notNull[i] = 1;
decimal_orc_column.values.resize(decimal_column.size());
for (size_t i = 0; i != decimal_column.size(); ++i)
decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
}
decimal_orc_column.numElements = decimal_column.size();
}
template <typename ColumnType>
void ORCBlockOutputFormat::writeStrings(
orc::ColumnVectorBatch & orc_column,
const IColumn & column,
const PaddedPODArray<UInt8> * null_bytemap)
const PaddedPODArray<UInt8> * /*null_bytemap*/)
{
orc::StringVectorBatch & string_orc_column = dynamic_cast<orc::StringVectorBatch &>(orc_column);
const auto & string_column = assert_cast<const ColumnType &>(column);
string_orc_column.resize(string_column.size());
string_orc_column.data.resize(string_column.size());
string_orc_column.length.resize(string_column.size());
for (size_t i = 0; i != string_column.size(); ++i)
{
if (null_bytemap && (*null_bytemap)[i])
{
string_orc_column.notNull[i] = 0;
continue;
}
string_orc_column.notNull[i] = 1;
const std::string_view & string = string_column.getDataAt(i).toView();
string_orc_column.data[i] = const_cast<char *>(string.data());
string_orc_column.length[i] = string.size();
}
string_orc_column.numElements = string_column.size();
}
template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
void ORCBlockOutputFormat::writeDateTimes(
orc::ColumnVectorBatch & orc_column,
const IColumn & column,
const PaddedPODArray<UInt8> * null_bytemap,
const PaddedPODArray<UInt8> * /*null_bytemap*/,
GetSecondsFunc get_seconds,
GetNanosecondsFunc get_nanoseconds)
{
orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch &>(orc_column);
const auto & timestamp_column = assert_cast<const ColumnType &>(column);
timestamp_orc_column.resize(timestamp_column.size());
timestamp_orc_column.data.resize(timestamp_column.size());
timestamp_orc_column.nanoseconds.resize(timestamp_column.size());
for (size_t i = 0; i != timestamp_column.size(); ++i)
{
if (null_bytemap && (*null_bytemap)[i])
{
timestamp_orc_column.notNull[i] = 0;
continue;
}
timestamp_orc_column.notNull[i] = 1;
timestamp_orc_column.data[i] = static_cast<int64_t>(get_seconds(timestamp_column.getElement(i)));
timestamp_orc_column.nanoseconds[i] = static_cast<int64_t>(get_nanoseconds(timestamp_column.getElement(i)));
}
timestamp_orc_column.numElements = timestamp_column.size();
}
void ORCBlockOutputFormat::writeColumn(
@ -311,9 +280,27 @@ void ORCBlockOutputFormat::writeColumn(
DataTypePtr & type,
const PaddedPODArray<UInt8> * null_bytemap)
{
orc_column.notNull.resize(column.size());
size_t rows = column.size();
orc_column.resize(rows);
orc_column.numElements = rows;
/// Calculate orc_column.hasNulls
if (null_bytemap)
orc_column.hasNulls = true;
orc_column.hasNulls = !memoryIsZero(null_bytemap->data(), 0, null_bytemap->size());
else
orc_column.hasNulls = false;
/// Fill orc_column.notNull
if (orc_column.hasNulls)
{
for (size_t i = 0; i < rows; ++i)
orc_column.notNull[i] = !(*null_bytemap)[i];
}
else
{
for (size_t i = 0; i < rows; ++i)
orc_column.notNull[i] = 1;
}
/// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
/// make the ORC library calculate min and max correctly.
@ -471,6 +458,7 @@ void ORCBlockOutputFormat::writeColumn(
}
case TypeIndex::Nullable:
{
chassert(!null_bytemap);
const auto & nullable_column = assert_cast<const ColumnNullable &>(column);
const PaddedPODArray<UInt8> & new_null_bytemap = assert_cast<const ColumnVector<UInt8> &>(*nullable_column.getNullMapColumnPtr()).getData();
auto nested_type = removeNullable(type);
@ -485,19 +473,15 @@ void ORCBlockOutputFormat::writeColumn(
const ColumnArray::Offsets & offsets = list_column.getOffsets();
size_t column_size = list_column.size();
list_orc_column.resize(column_size);
list_orc_column.offsets.resize(column_size + 1);
/// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
list_orc_column.offsets[0] = 0;
for (size_t i = 0; i != column_size; ++i)
{
list_orc_column.offsets[i + 1] = offsets[i];
list_orc_column.notNull[i] = 1;
}
orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
list_orc_column.numElements = column_size;
writeColumn(nested_orc_column, list_column.getData(), nested_type, nullptr);
break;
}
case TypeIndex::Tuple:
@ -505,10 +489,8 @@ void ORCBlockOutputFormat::writeColumn(
orc::StructVectorBatch & struct_orc_column = dynamic_cast<orc::StructVectorBatch &>(orc_column);
const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
auto nested_types = assert_cast<const DataTypeTuple *>(type.get())->getElements();
for (size_t i = 0; i != tuple_column.size(); ++i)
struct_orc_column.notNull[i] = 1;
for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap);
writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], nullptr);
break;
}
case TypeIndex::Map:
@ -520,25 +502,21 @@ void ORCBlockOutputFormat::writeColumn(
size_t column_size = list_column.size();
map_orc_column.resize(list_column.size());
map_orc_column.offsets.resize(column_size + 1);
/// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
map_orc_column.offsets[0] = 0;
for (size_t i = 0; i != column_size; ++i)
{
map_orc_column.offsets[i + 1] = offsets[i];
map_orc_column.notNull[i] = 1;
}
const auto nested_columns = assert_cast<const ColumnTuple *>(list_column.getDataPtr().get())->getColumns();
orc::ColumnVectorBatch & keys_orc_column = *map_orc_column.keys;
auto key_type = map_type.getKeyType();
writeColumn(keys_orc_column, *nested_columns[0], key_type, null_bytemap);
writeColumn(keys_orc_column, *nested_columns[0], key_type, nullptr);
orc::ColumnVectorBatch & values_orc_column = *map_orc_column.elements;
auto value_type = map_type.getValueType();
writeColumn(values_orc_column, *nested_columns[1], value_type, null_bytemap);
map_orc_column.numElements = column_size;
writeColumn(values_orc_column, *nested_columns[1], value_type, nullptr);
break;
}
default:
@ -546,27 +524,6 @@ void ORCBlockOutputFormat::writeColumn(
}
}
size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type)
{
if (type->getTypeId() == TypeIndex::Array)
{
auto nested_type = assert_cast<const DataTypeArray &>(*type).getNestedType();
const IColumn & nested_column = assert_cast<const ColumnArray &>(column).getData();
return std::max(column.size(), getColumnSize(nested_column, nested_type));
}
return column.size();
}
size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk)
{
size_t columns_num = chunk.getNumColumns();
size_t max_column_size = 0;
for (size_t i = 0; i != columns_num; ++i)
max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i]));
return max_column_size;
}
void ORCBlockOutputFormat::consume(Chunk chunk)
{
if (!writer)
@ -575,10 +532,7 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
size_t columns_num = chunk.getNumColumns();
size_t rows_num = chunk.getNumRows();
/// getMaxColumnSize is needed to write arrays.
/// The size of the batch must be no less than total amount of array elements
/// and no less than the number of rows (ORC writes a null bit for every row).
std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(chunk.getNumRows());
orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
auto columns = chunk.detachColumns();

View File

@ -69,11 +69,6 @@ private:
void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
/// These two functions are needed to know maximum nested size of arrays to
/// create an ORC Batch with the appropriate size
size_t getColumnSize(const IColumn & column, DataTypePtr & type);
size_t getMaxColumnSize(Chunk & chunk);
void prepareWriter();
const FormatSettings format_settings;

View File

@ -4,4 +4,4 @@ clickhouse_add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.c
target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
target_link_libraries (columns_description_fuzzer PRIVATE dbms clickhouse_functions)
target_link_libraries (columns_description_fuzzer PRIVATE clickhouse_functions)

View File

@ -1,4 +1,5 @@
#include <Storages/ColumnsDescription.h>
#include <iostream>
#include <iostream>

View File

@ -75,7 +75,7 @@ def get_run_command(
f"--volume={result_path}:/test_output "
"--security-opt seccomp=unconfined " # required to issue io_uring sys-calls
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} "
"python3 ./utils/runner.py"
"python3 /usr/share/clickhouse-test/fuzz/runner.py"
)

View File

@ -11,7 +11,7 @@ FUZZER_ARGS = os.getenv("FUZZER_ARGS", "")
def run_fuzzer(fuzzer: str):
logging.info(f"Running fuzzer {fuzzer}...")
logging.info("Running fuzzer %s...", fuzzer)
corpus_dir = f"{fuzzer}.in"
with Path(corpus_dir) as path:
@ -29,28 +29,27 @@ def run_fuzzer(fuzzer: str):
if parser.has_section("asan"):
os.environ["ASAN_OPTIONS"] = (
f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}"
f"{os.environ['ASAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['asan'].items())}"
)
if parser.has_section("msan"):
os.environ["MSAN_OPTIONS"] = (
f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}"
f"{os.environ['MSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['msan'].items())}"
)
if parser.has_section("ubsan"):
os.environ["UBSAN_OPTIONS"] = (
f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}"
f"{os.environ['UBSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['ubsan'].items())}"
)
if parser.has_section("libfuzzer"):
custom_libfuzzer_options = " ".join(
"-%s=%s" % (key, value)
for key, value in parser["libfuzzer"].items()
f"-{key}={value}" for key, value in parser["libfuzzer"].items()
)
if parser.has_section("fuzzer_arguments"):
fuzzer_arguments = " ".join(
("%s" % key) if value == "" else ("%s=%s" % (key, value))
(f"{key}") if value == "" else (f"{key}={value}")
for key, value in parser["fuzzer_arguments"].items()
)
@ -65,7 +64,7 @@ def run_fuzzer(fuzzer: str):
cmd_line += " < /dev/null"
logging.info(f"...will execute: {cmd_line}")
logging.info("...will execute: %s", cmd_line)
subprocess.check_call(cmd_line, shell=True)

View File

@ -0,0 +1,4 @@
[fuzzer_arguments]
--log-file=tcp_protocol_fuzzer.log
--=
--logging.terminal=0