mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #27895 from nikitamikhaylov/temporary_remove_yaml_fuzzer
Build fuzzers with clang-tidy
This commit is contained in:
commit
cfa571cac4
@ -173,6 +173,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
|||||||
cmake_flags.append('-DUSE_GTEST=1')
|
cmake_flags.append('-DUSE_GTEST=1')
|
||||||
cmake_flags.append('-DENABLE_TESTS=1')
|
cmake_flags.append('-DENABLE_TESTS=1')
|
||||||
cmake_flags.append('-DENABLE_EXAMPLES=1')
|
cmake_flags.append('-DENABLE_EXAMPLES=1')
|
||||||
|
cmake_flags.append('-DENABLE_FUZZING=1')
|
||||||
|
# For fuzzing needs
|
||||||
|
cmake_flags.append('-DUSE_YAML_CPP=1')
|
||||||
# Don't stop on first error to find more clang-tidy errors in one run.
|
# Don't stop on first error to find more clang-tidy errors in one run.
|
||||||
result.append('NINJA_FLAGS=-k0')
|
result.append('NINJA_FLAGS=-k0')
|
||||||
|
|
||||||
|
@ -80,8 +80,3 @@ target_link_libraries (average PRIVATE clickhouse_common_io)
|
|||||||
|
|
||||||
add_executable (shell_command_inout shell_command_inout.cpp)
|
add_executable (shell_command_inout shell_command_inout.cpp)
|
||||||
target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)
|
target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)
|
||||||
|
|
||||||
if (ENABLE_FUZZING)
|
|
||||||
add_executable(YAML_fuzzer YAML_fuzzer.cpp ${SRCS})
|
|
||||||
target_link_libraries(YAML_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
|
||||||
endif ()
|
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <string>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <time.h>
|
|
||||||
#include <filesystem>
|
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
|
|
||||||
{
|
|
||||||
/// How to test:
|
|
||||||
/// build ClickHouse with YAML_fuzzer.cpp
|
|
||||||
/// ./YAML_fuzzer YAML_CORPUS
|
|
||||||
/// where YAML_CORPUS is a directory with different YAML configs for libfuzzer
|
|
||||||
char file_name[L_tmpnam];
|
|
||||||
if (!std::tmpnam(file_name))
|
|
||||||
{
|
|
||||||
std::cerr << "Cannot create temp file!\n";
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
std::string input = std::string(reinterpret_cast<const char*>(data), size);
|
|
||||||
DB::YAMLParser parser;
|
|
||||||
|
|
||||||
{
|
|
||||||
std::ofstream temp_file(file_name);
|
|
||||||
temp_file << input;
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
DB::YAMLParser::parse(std::string(file_name));
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
std::cerr << "YAML_fuzzer failed: " << getCurrentExceptionMessage() << std::endl;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
|||||||
if(ENABLE_EXAMPLES)
|
if (ENABLE_FUZZING)
|
||||||
|
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||||
|
add_headers_and_sources(fuzz_compression .)
|
||||||
|
|
||||||
|
# Remove this file, because it has dependencies on DataTypes
|
||||||
|
list(REMOVE_ITEM ${fuzz_compression_sources} CompressionFactoryAdditions.cpp)
|
||||||
|
|
||||||
|
add_library(fuzz_compression ${fuzz_compression_headers} ${fuzz_compression_sources})
|
||||||
|
target_link_libraries(fuzz_compression PUBLIC clickhouse_parsers clickhouse_common_io common lz4)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_FUZZING)
|
||||||
|
add_subdirectory(fuzzers)
|
||||||
|
endif()
|
||||||
|
@ -22,13 +22,10 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
extern const int UNKNOWN_CODEC;
|
extern const int UNKNOWN_CODEC;
|
||||||
extern const int BAD_ARGUMENTS;
|
|
||||||
extern const int UNEXPECTED_AST_STRUCTURE;
|
extern const int UNEXPECTED_AST_STRUCTURE;
|
||||||
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
|
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto DEFAULT_CODEC_NAME = "Default";
|
|
||||||
|
|
||||||
CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
|
CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
|
||||||
{
|
{
|
||||||
return default_codec;
|
return default_codec;
|
||||||
@ -49,184 +46,6 @@ CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompressionCodecFactory::validateCodec(
|
|
||||||
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const
|
|
||||||
{
|
|
||||||
if (family_name.empty())
|
|
||||||
throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
if (level)
|
|
||||||
{
|
|
||||||
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
|
||||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
|
|
||||||
{}, sanity_check, allow_experimental_codecs);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
|
||||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
|
|
||||||
{}, sanity_check, allow_experimental_codecs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
|
||||||
const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const
|
|
||||||
{
|
|
||||||
if (const auto * func = ast->as<ASTFunction>())
|
|
||||||
{
|
|
||||||
ASTPtr codecs_descriptions = std::make_shared<ASTExpressionList>();
|
|
||||||
|
|
||||||
bool is_compression = false;
|
|
||||||
bool has_none = false;
|
|
||||||
std::optional<size_t> generic_compression_codec_pos;
|
|
||||||
std::set<size_t> post_processing_codecs;
|
|
||||||
|
|
||||||
bool can_substitute_codec_arguments = true;
|
|
||||||
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
|
|
||||||
{
|
|
||||||
const auto & inner_codec_ast = func->arguments->children[i];
|
|
||||||
String codec_family_name;
|
|
||||||
ASTPtr codec_arguments;
|
|
||||||
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
|
|
||||||
{
|
|
||||||
codec_family_name = family_name->name();
|
|
||||||
codec_arguments = {};
|
|
||||||
}
|
|
||||||
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
|
|
||||||
{
|
|
||||||
codec_family_name = ast_func->name;
|
|
||||||
codec_arguments = ast_func->arguments;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
|
||||||
|
|
||||||
/// Default codec replaced with current default codec which may depend on different
|
|
||||||
/// settings (and properties of data) in runtime.
|
|
||||||
CompressionCodecPtr result_codec;
|
|
||||||
if (codec_family_name == DEFAULT_CODEC_NAME)
|
|
||||||
{
|
|
||||||
if (codec_arguments != nullptr)
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"{} codec cannot have any arguments, it's just an alias for codec specified in config.xml", DEFAULT_CODEC_NAME);
|
|
||||||
|
|
||||||
result_codec = default_codec;
|
|
||||||
codecs_descriptions->children.emplace_back(std::make_shared<ASTIdentifier>(DEFAULT_CODEC_NAME));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (column_type)
|
|
||||||
{
|
|
||||||
CompressionCodecPtr prev_codec;
|
|
||||||
IDataType::StreamCallbackWithType callback = [&](
|
|
||||||
const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
|
|
||||||
{
|
|
||||||
if (ISerialization::isSpecialCompressionAllowed(substream_path))
|
|
||||||
{
|
|
||||||
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
|
|
||||||
|
|
||||||
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
|
|
||||||
/// We cannot substitute parameters for such codecs.
|
|
||||||
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
|
|
||||||
can_substitute_codec_arguments = false;
|
|
||||||
prev_codec = result_codec;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ISerialization::SubstreamPath stream_path;
|
|
||||||
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
|
|
||||||
|
|
||||||
if (!result_codec)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!allow_experimental_codecs && result_codec->isExperimental())
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Codec {} is experimental and not meant to be used in production."
|
|
||||||
" You can enable it with the 'allow_experimental_codecs' setting.",
|
|
||||||
codec_family_name);
|
|
||||||
|
|
||||||
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
|
|
||||||
}
|
|
||||||
|
|
||||||
is_compression |= result_codec->isCompression();
|
|
||||||
has_none |= result_codec->isNone();
|
|
||||||
|
|
||||||
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
|
|
||||||
generic_compression_codec_pos = i;
|
|
||||||
|
|
||||||
if (result_codec->isPostProcessing())
|
|
||||||
post_processing_codecs.insert(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
String codec_description = queryToString(codecs_descriptions);
|
|
||||||
|
|
||||||
if (sanity_check)
|
|
||||||
{
|
|
||||||
if (codecs_descriptions->children.size() > 1 && has_none)
|
|
||||||
throw Exception(
|
|
||||||
"It does not make sense to have codec NONE along with other compression codecs: " + codec_description
|
|
||||||
+ ". (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
|
||||||
ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
/// Allow to explicitly specify single NONE codec if user don't want any compression.
|
|
||||||
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
|
|
||||||
/// It's okay to apply post-processing codecs solely without anything else.
|
|
||||||
if (!is_compression && !has_none && post_processing_codecs.size() != codecs_descriptions->children.size())
|
|
||||||
throw Exception(
|
|
||||||
"Compression codec " + codec_description
|
|
||||||
+ " does not compress anything."
|
|
||||||
" You may want to add generic compression algorithm after other transformations, like: "
|
|
||||||
+ codec_description
|
|
||||||
+ ", LZ4."
|
|
||||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
|
||||||
ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
/// It does not make sense to apply any non-post-processing codecs
|
|
||||||
/// after post-processing one.
|
|
||||||
if (!post_processing_codecs.empty() &&
|
|
||||||
*post_processing_codecs.begin() != codecs_descriptions->children.size() - post_processing_codecs.size())
|
|
||||||
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
|
||||||
" because it does not make sense to apply any non-post-processing codecs after"
|
|
||||||
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
|
|
||||||
" to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
/// It does not make sense to apply any transformations after generic compression algorithm
|
|
||||||
/// So, generic compression can be only one and only at the end.
|
|
||||||
if (generic_compression_codec_pos &&
|
|
||||||
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - post_processing_codecs.size())
|
|
||||||
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
|
||||||
" because it does not make sense to apply any transformations after generic compression algorithm."
|
|
||||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/// For columns with nested types like Tuple(UInt32, UInt64) we
|
|
||||||
/// obviously cannot substitute parameters for codecs which depend on
|
|
||||||
/// data type, because for the first column Delta(4) is suitable and
|
|
||||||
/// Delta(8) for the second. So we should leave codec description as is
|
|
||||||
/// and deduce them in get method for each subtype separately. For all
|
|
||||||
/// other types it's better to substitute parameters, for better
|
|
||||||
/// readability and backward compatibility.
|
|
||||||
if (can_substitute_codec_arguments)
|
|
||||||
{
|
|
||||||
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
|
|
||||||
result->name = "CODEC";
|
|
||||||
result->arguments = codecs_descriptions;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return ast;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
CompressionCodecPtr CompressionCodecFactory::get(
|
CompressionCodecPtr CompressionCodecFactory::get(
|
||||||
const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
|
const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static constexpr auto DEFAULT_CODEC_NAME = "Default";
|
||||||
|
|
||||||
class ICompressionCodec;
|
class ICompressionCodec;
|
||||||
|
|
||||||
using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>;
|
using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>;
|
||||||
|
214
src/Compression/CompressionFactoryAdditions.cpp
Normal file
214
src/Compression/CompressionFactoryAdditions.cpp
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
/**
|
||||||
|
* This file contains a part of CompressionCodecFactory methods definitions and
|
||||||
|
* is needed only because they have dependencies on DataTypes.
|
||||||
|
* They are not useful for fuzzers, so we leave them in other translation unit.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Compression/CompressionFactory.h>
|
||||||
|
|
||||||
|
#include <Parsers/ASTFunction.h>
|
||||||
|
#include <Parsers/ASTLiteral.h>
|
||||||
|
#include <Parsers/ASTIdentifier.h>
|
||||||
|
#include <Parsers/parseQuery.h>
|
||||||
|
#include <Parsers/queryToString.h>
|
||||||
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
|
#include <DataTypes/NestedUtils.h>
|
||||||
|
#include <DataTypes/DataTypeArray.h>
|
||||||
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
#include <DataTypes/DataTypeNested.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int UNEXPECTED_AST_STRUCTURE;
|
||||||
|
extern const int UNKNOWN_CODEC;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CompressionCodecFactory::validateCodec(
|
||||||
|
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const
|
||||||
|
{
|
||||||
|
if (family_name.empty())
|
||||||
|
throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
if (level)
|
||||||
|
{
|
||||||
|
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
||||||
|
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
|
||||||
|
{}, sanity_check, allow_experimental_codecs);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
||||||
|
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
|
||||||
|
{}, sanity_check, allow_experimental_codecs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||||
|
const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const
|
||||||
|
{
|
||||||
|
if (const auto * func = ast->as<ASTFunction>())
|
||||||
|
{
|
||||||
|
ASTPtr codecs_descriptions = std::make_shared<ASTExpressionList>();
|
||||||
|
|
||||||
|
bool is_compression = false;
|
||||||
|
bool has_none = false;
|
||||||
|
std::optional<size_t> generic_compression_codec_pos;
|
||||||
|
std::set<size_t> post_processing_codecs;
|
||||||
|
|
||||||
|
bool can_substitute_codec_arguments = true;
|
||||||
|
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
|
||||||
|
{
|
||||||
|
const auto & inner_codec_ast = func->arguments->children[i];
|
||||||
|
String codec_family_name;
|
||||||
|
ASTPtr codec_arguments;
|
||||||
|
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
|
||||||
|
{
|
||||||
|
codec_family_name = family_name->name();
|
||||||
|
codec_arguments = {};
|
||||||
|
}
|
||||||
|
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
|
||||||
|
{
|
||||||
|
codec_family_name = ast_func->name;
|
||||||
|
codec_arguments = ast_func->arguments;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||||
|
|
||||||
|
/// Default codec replaced with current default codec which may depend on different
|
||||||
|
/// settings (and properties of data) in runtime.
|
||||||
|
CompressionCodecPtr result_codec;
|
||||||
|
if (codec_family_name == DEFAULT_CODEC_NAME)
|
||||||
|
{
|
||||||
|
if (codec_arguments != nullptr)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"{} codec cannot have any arguments, it's just an alias for codec specified in config.xml", DEFAULT_CODEC_NAME);
|
||||||
|
|
||||||
|
result_codec = default_codec;
|
||||||
|
codecs_descriptions->children.emplace_back(std::make_shared<ASTIdentifier>(DEFAULT_CODEC_NAME));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (column_type)
|
||||||
|
{
|
||||||
|
CompressionCodecPtr prev_codec;
|
||||||
|
IDataType::StreamCallbackWithType callback = [&](
|
||||||
|
const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
|
||||||
|
{
|
||||||
|
if (ISerialization::isSpecialCompressionAllowed(substream_path))
|
||||||
|
{
|
||||||
|
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
|
||||||
|
|
||||||
|
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
|
||||||
|
/// We cannot substitute parameters for such codecs.
|
||||||
|
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
|
||||||
|
can_substitute_codec_arguments = false;
|
||||||
|
prev_codec = result_codec;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ISerialization::SubstreamPath stream_path;
|
||||||
|
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
|
||||||
|
|
||||||
|
if (!result_codec)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allow_experimental_codecs && result_codec->isExperimental())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Codec {} is experimental and not meant to be used in production."
|
||||||
|
" You can enable it with the 'allow_experimental_codecs' setting.",
|
||||||
|
codec_family_name);
|
||||||
|
|
||||||
|
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
|
||||||
|
}
|
||||||
|
|
||||||
|
is_compression |= result_codec->isCompression();
|
||||||
|
has_none |= result_codec->isNone();
|
||||||
|
|
||||||
|
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
|
||||||
|
generic_compression_codec_pos = i;
|
||||||
|
|
||||||
|
if (result_codec->isPostProcessing())
|
||||||
|
post_processing_codecs.insert(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
String codec_description = queryToString(codecs_descriptions);
|
||||||
|
|
||||||
|
if (sanity_check)
|
||||||
|
{
|
||||||
|
if (codecs_descriptions->children.size() > 1 && has_none)
|
||||||
|
throw Exception(
|
||||||
|
"It does not make sense to have codec NONE along with other compression codecs: " + codec_description
|
||||||
|
+ ". (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||||
|
ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
/// Allow to explicitly specify single NONE codec if user don't want any compression.
|
||||||
|
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
|
||||||
|
/// It's okay to apply post-processing codecs solely without anything else.
|
||||||
|
if (!is_compression && !has_none && post_processing_codecs.size() != codecs_descriptions->children.size())
|
||||||
|
throw Exception(
|
||||||
|
"Compression codec " + codec_description
|
||||||
|
+ " does not compress anything."
|
||||||
|
" You may want to add generic compression algorithm after other transformations, like: "
|
||||||
|
+ codec_description
|
||||||
|
+ ", LZ4."
|
||||||
|
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||||
|
ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
/// It does not make sense to apply any non-post-processing codecs
|
||||||
|
/// after post-processing one.
|
||||||
|
if (!post_processing_codecs.empty() &&
|
||||||
|
*post_processing_codecs.begin() != codecs_descriptions->children.size() - post_processing_codecs.size())
|
||||||
|
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
||||||
|
" because it does not make sense to apply any non-post-processing codecs after"
|
||||||
|
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
|
||||||
|
" to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
/// It does not make sense to apply any transformations after generic compression algorithm
|
||||||
|
/// So, generic compression can be only one and only at the end.
|
||||||
|
if (generic_compression_codec_pos &&
|
||||||
|
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - post_processing_codecs.size())
|
||||||
|
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
||||||
|
" because it does not make sense to apply any transformations after generic compression algorithm."
|
||||||
|
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/// For columns with nested types like Tuple(UInt32, UInt64) we
|
||||||
|
/// obviously cannot substitute parameters for codecs which depend on
|
||||||
|
/// data type, because for the first column Delta(4) is suitable and
|
||||||
|
/// Delta(8) for the second. So we should leave codec description as is
|
||||||
|
/// and deduce them in get method for each subtype separately. For all
|
||||||
|
/// other types it's better to substitute parameters, for better
|
||||||
|
/// readability and backward compatibility.
|
||||||
|
if (can_substitute_codec_arguments)
|
||||||
|
{
|
||||||
|
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
|
||||||
|
result->name = "CODEC";
|
||||||
|
result->arguments = codecs_descriptions;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return ast;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -3,8 +3,3 @@ target_link_libraries (compressed_buffer PRIVATE dbms)
|
|||||||
|
|
||||||
add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp)
|
add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp)
|
||||||
target_link_libraries (cached_compressed_read_buffer PRIVATE dbms)
|
target_link_libraries (cached_compressed_read_buffer PRIVATE dbms)
|
||||||
|
|
||||||
if (ENABLE_FUZZING)
|
|
||||||
add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
|
|
||||||
target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
|
||||||
endif ()
|
|
||||||
|
2
src/Compression/fuzzers/CMakeLists.txt
Normal file
2
src/Compression/fuzzers/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
|
||||||
|
target_link_libraries (compressed_buffer_fuzzer PRIVATE fuzz_compression clickhouse_common_io ${LIB_FUZZING_ENGINE})
|
@ -1,3 +1,7 @@
|
|||||||
if (ENABLE_EXAMPLES)
|
if (ENABLE_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (ENABLE_FUZZING)
|
||||||
|
add_subdirectory(fuzzers)
|
||||||
|
endif()
|
||||||
|
@ -8,11 +8,6 @@ target_link_libraries (field PRIVATE dbms)
|
|||||||
add_executable (string_ref_hash string_ref_hash.cpp)
|
add_executable (string_ref_hash string_ref_hash.cpp)
|
||||||
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io)
|
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io)
|
||||||
|
|
||||||
if (ENABLE_FUZZING)
|
|
||||||
add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
|
|
||||||
target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
add_executable (mysql_protocol mysql_protocol.cpp)
|
add_executable (mysql_protocol mysql_protocol.cpp)
|
||||||
target_link_libraries (mysql_protocol PRIVATE dbms)
|
target_link_libraries (mysql_protocol PRIVATE dbms)
|
||||||
if(USE_SSL)
|
if(USE_SSL)
|
||||||
|
2
src/Core/fuzzers/CMakeLists.txt
Normal file
2
src/Core/fuzzers/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
|
||||||
|
target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
@ -26,23 +26,6 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
IDataType::~IDataType() = default;
|
IDataType::~IDataType() = default;
|
||||||
|
|
||||||
String IDataType::getName() const
|
|
||||||
{
|
|
||||||
if (custom_name)
|
|
||||||
{
|
|
||||||
return custom_name->getName();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return doGetName();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
String IDataType::doGetName() const
|
|
||||||
{
|
|
||||||
return getFamilyName();
|
|
||||||
}
|
|
||||||
|
|
||||||
void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
|
void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
|
||||||
{
|
{
|
||||||
/// Update the average value size hint if amount of read rows isn't too small
|
/// Update the average value size hint if amount of read rows isn't too small
|
||||||
|
@ -62,7 +62,13 @@ public:
|
|||||||
/// static constexpr bool is_parametric = false;
|
/// static constexpr bool is_parametric = false;
|
||||||
|
|
||||||
/// Name of data type (examples: UInt64, Array(String)).
|
/// Name of data type (examples: UInt64, Array(String)).
|
||||||
String getName() const;
|
String getName() const
|
||||||
|
{
|
||||||
|
if (custom_name)
|
||||||
|
return custom_name->getName();
|
||||||
|
else
|
||||||
|
return doGetName();
|
||||||
|
}
|
||||||
|
|
||||||
/// Name of data type family (example: FixedString, Array).
|
/// Name of data type family (example: FixedString, Array).
|
||||||
virtual const char * getFamilyName() const = 0;
|
virtual const char * getFamilyName() const = 0;
|
||||||
@ -105,7 +111,7 @@ public:
|
|||||||
void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback) const { enumerateStreams(serialization, callback, {}); }
|
void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback) const { enumerateStreams(serialization, callback, {}); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual String doGetName() const;
|
virtual String doGetName() const { return getFamilyName(); }
|
||||||
virtual SerializationPtr doGetDefaultSerialization() const = 0;
|
virtual SerializationPtr doGetDefaultSerialization() const = 0;
|
||||||
|
|
||||||
DataTypePtr getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const;
|
DataTypePtr getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const;
|
||||||
|
@ -12,3 +12,7 @@ endif ()
|
|||||||
if(ENABLE_EXAMPLES)
|
if(ENABLE_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_FUZZING)
|
||||||
|
add_subdirectory(fuzzers)
|
||||||
|
endif()
|
||||||
|
@ -8,14 +8,3 @@ target_link_libraries(select_parser PRIVATE clickhouse_parsers)
|
|||||||
|
|
||||||
add_executable(create_parser create_parser.cpp ${SRCS})
|
add_executable(create_parser create_parser.cpp ${SRCS})
|
||||||
target_link_libraries(create_parser PRIVATE clickhouse_parsers)
|
target_link_libraries(create_parser PRIVATE clickhouse_parsers)
|
||||||
|
|
||||||
if (ENABLE_FUZZING)
|
|
||||||
add_executable(lexer_fuzzer lexer_fuzzer.cpp ${SRCS})
|
|
||||||
target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
|
||||||
|
|
||||||
add_executable(select_parser_fuzzer select_parser_fuzzer.cpp ${SRCS})
|
|
||||||
target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
|
||||||
|
|
||||||
add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS})
|
|
||||||
target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
|
||||||
endif ()
|
|
||||||
|
8
src/Parsers/fuzzers/CMakeLists.txt
Normal file
8
src/Parsers/fuzzers/CMakeLists.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
add_executable(lexer_fuzzer lexer_fuzzer.cpp ${SRCS})
|
||||||
|
target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
||||||
|
|
||||||
|
add_executable(select_parser_fuzzer select_parser_fuzzer.cpp ${SRCS})
|
||||||
|
target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
||||||
|
|
||||||
|
add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS})
|
||||||
|
target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
@ -15,7 +15,10 @@ try
|
|||||||
DB::ParserCreateQuery parser;
|
DB::ParserCreateQuery parser;
|
||||||
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
|
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
|
||||||
|
|
||||||
DB::formatAST(*ast, std::cerr);
|
DB::WriteBufferFromOwnString wb;
|
||||||
|
DB::formatAST(*ast, wb);
|
||||||
|
|
||||||
|
std::cerr << wb.str() << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@ -14,7 +14,10 @@ try
|
|||||||
DB::ParserQueryWithOutput parser(input.data() + input.size());
|
DB::ParserQueryWithOutput parser(input.data() + input.size());
|
||||||
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
|
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
|
||||||
|
|
||||||
DB::formatAST(*ast, std::cerr);
|
DB::WriteBufferFromOwnString wb;
|
||||||
|
DB::formatAST(*ast, wb);
|
||||||
|
|
||||||
|
std::cerr << wb.str() << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@ -1,6 +1,10 @@
|
|||||||
add_subdirectory(MergeTree)
|
add_subdirectory(MergeTree)
|
||||||
add_subdirectory(System)
|
add_subdirectory(System)
|
||||||
|
|
||||||
if(ENABLE_EXAMPLES)
|
if (ENABLE_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_FUZZING)
|
||||||
|
add_subdirectory(fuzzers)
|
||||||
|
endif()
|
||||||
|
@ -23,10 +23,3 @@ target_link_libraries (transform_part_zk_nodes
|
|||||||
string_utils
|
string_utils
|
||||||
)
|
)
|
||||||
|
|
||||||
if (ENABLE_FUZZING)
|
|
||||||
add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp)
|
|
||||||
target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
|
||||||
|
|
||||||
add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
|
|
||||||
target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
|
||||||
endif ()
|
|
||||||
|
11
src/Storages/fuzzers/CMakeLists.txt
Normal file
11
src/Storages/fuzzers/CMakeLists.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
add_executable (mergetree_checksum_fuzzer
|
||||||
|
mergetree_checksum_fuzzer.cpp
|
||||||
|
"${ClickHouse_SOURCE_DIR}/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp"
|
||||||
|
"${ClickHouse_SOURCE_DIR}/src/Compression/CompressedReadBuffer.cpp"
|
||||||
|
"${ClickHouse_SOURCE_DIR}/src/Compression/CompressedWriteBuffer.cpp"
|
||||||
|
)
|
||||||
|
target_link_libraries (mergetree_checksum_fuzzer PRIVATE clickhouse_common_io fuzz_compression ${LIB_FUZZING_ENGINE})
|
||||||
|
|
||||||
|
add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
|
||||||
|
target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
|
Loading…
Reference in New Issue
Block a user