mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 05:22:17 +00:00
1254 lines
37 KiB
C++
1254 lines
37 KiB
C++
#include "QueryFuzzer.h"
|
|
#include <DataTypes/DataTypeArray.h>
|
|
#include <DataTypes/DataTypeFactory.h>
|
|
#include <DataTypes/DataTypeFixedString.h>
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
|
#include <DataTypes/DataTypeMap.h>
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
#include <DataTypes/DataTypeTuple.h>
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
#include <DataTypes/DataTypesDecimal.h>
|
|
#include <DataTypes/IDataType.h>
|
|
#include <IO/ReadBufferFromString.h>
|
|
#include <IO/WriteHelpers.h>
|
|
#include <Parsers/ASTColumnDeclaration.h>
|
|
#include <Parsers/ASTCreateQuery.h>
|
|
#include <Parsers/IAST_fwd.h>
|
|
#include <Parsers/ParserDataType.h>
|
|
#include <Parsers/ParserInsertQuery.h>
|
|
#include <Parsers/ASTDropQuery.h>
|
|
|
|
#include <unordered_set>
|
|
|
|
#include <Core/Types.h>
|
|
#include <IO/Operators.h>
|
|
#include <IO/UseSSL.h>
|
|
#include <IO/WriteBufferFromOStream.h>
|
|
#include <Parsers/ASTExpressionList.h>
|
|
#include <Parsers/ASTFunction.h>
|
|
#include <Parsers/ASTIdentifier.h>
|
|
#include <Parsers/ASTInsertQuery.h>
|
|
#include <Parsers/ASTLiteral.h>
|
|
#include <Parsers/ASTOrderByElement.h>
|
|
#include <Parsers/ASTQueryWithOutput.h>
|
|
#include <Parsers/ASTSelectIntersectExceptQuery.h>
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
|
#include <Parsers/ASTSetQuery.h>
|
|
#include <Parsers/ASTSubquery.h>
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
#include <Parsers/ASTUseQuery.h>
|
|
#include <Parsers/ASTWindowDefinition.h>
|
|
#include <Parsers/ParserQuery.h>
|
|
#include <Parsers/formatAST.h>
|
|
#include <Parsers/parseQuery.h>
|
|
#include <pcg_random.hpp>
|
|
#include <Common/assert_cast.h>
|
|
#include <Common/typeid_cast.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int TOO_DEEP_RECURSION;
|
|
}
|
|
|
|
Field QueryFuzzer::getRandomField(int type)
|
|
{
|
|
static constexpr Int64 bad_int64_values[]
|
|
= {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
|
|
1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
|
|
1024 * 1024 + 1, INT_MIN - 1ll, INT_MIN, INT_MIN + 1,
|
|
INT_MAX - 1, INT_MAX, INT_MAX + 1ll, INT64_MIN, INT64_MIN + 1,
|
|
INT64_MAX - 1, INT64_MAX};
|
|
switch (type)
|
|
{
|
|
case 0:
|
|
{
|
|
return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
|
|
/ sizeof(*bad_int64_values))];
|
|
}
|
|
case 1:
|
|
{
|
|
static constexpr double values[]
|
|
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
|
|
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
|
|
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
|
|
}
|
|
case 2:
|
|
{
|
|
static constexpr UInt64 scales[] = {0, 1, 2, 10};
|
|
return DecimalField<Decimal64>(
|
|
bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
|
|
static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
|
|
);
|
|
}
|
|
default:
|
|
assert(false);
|
|
return Null{};
|
|
}
|
|
}
|
|
|
|
Field QueryFuzzer::fuzzField(Field field)
|
|
{
|
|
const auto type = field.getType();
|
|
|
|
int type_index = -1;
|
|
|
|
if (type == Field::Types::Int64
|
|
|| type == Field::Types::UInt64)
|
|
{
|
|
type_index = 0;
|
|
}
|
|
else if (type == Field::Types::Float64)
|
|
{
|
|
type_index = 1;
|
|
}
|
|
else if (type == Field::Types::Decimal32
|
|
|| type == Field::Types::Decimal64
|
|
|| type == Field::Types::Decimal128
|
|
|| type == Field::Types::Decimal256)
|
|
{
|
|
type_index = 2;
|
|
}
|
|
|
|
if (fuzz_rand() % 20 == 0)
|
|
{
|
|
return Null{};
|
|
}
|
|
|
|
if (type_index >= 0)
|
|
{
|
|
if (fuzz_rand() % 20 == 0)
|
|
{
|
|
// Change type sometimes, but not often, because it mostly leads to
|
|
// boring errors.
|
|
type_index = fuzz_rand() % 3;
|
|
}
|
|
return getRandomField(type_index);
|
|
}
|
|
|
|
if (type == Field::Types::String)
|
|
{
|
|
auto & str = field.get<std::string>();
|
|
UInt64 action = fuzz_rand() % 10;
|
|
switch (action)
|
|
{
|
|
case 0:
|
|
str = "";
|
|
break;
|
|
case 1:
|
|
str = str + str;
|
|
break;
|
|
case 2:
|
|
str = str + str + str + str;
|
|
break;
|
|
case 4:
|
|
if (!str.empty())
|
|
{
|
|
str[fuzz_rand() % str.size()] = '\0';
|
|
}
|
|
break;
|
|
default:
|
|
// Do nothing
|
|
break;
|
|
}
|
|
}
|
|
else if (type == Field::Types::Array)
|
|
{
|
|
auto & arr = field.get<Array>();
|
|
|
|
if (fuzz_rand() % 5 == 0 && !arr.empty())
|
|
{
|
|
size_t pos = fuzz_rand() % arr.size();
|
|
arr.erase(arr.begin() + pos);
|
|
std::cerr << "erased\n";
|
|
}
|
|
|
|
if (fuzz_rand() % 5 == 0)
|
|
{
|
|
if (!arr.empty())
|
|
{
|
|
size_t pos = fuzz_rand() % arr.size();
|
|
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
|
|
std::cerr << fmt::format("inserted (pos {})\n", pos);
|
|
}
|
|
else
|
|
{
|
|
arr.insert(arr.begin(), getRandomField(0));
|
|
std::cerr << "inserted (0)\n";
|
|
}
|
|
|
|
}
|
|
|
|
for (auto & element : arr)
|
|
{
|
|
element = fuzzField(element);
|
|
}
|
|
}
|
|
else if (type == Field::Types::Tuple)
|
|
{
|
|
auto & arr = field.get<Tuple>();
|
|
|
|
if (fuzz_rand() % 5 == 0 && !arr.empty())
|
|
{
|
|
size_t pos = fuzz_rand() % arr.size();
|
|
arr.erase(arr.begin() + pos);
|
|
std::cerr << "erased\n";
|
|
}
|
|
|
|
if (fuzz_rand() % 5 == 0)
|
|
{
|
|
if (!arr.empty())
|
|
{
|
|
size_t pos = fuzz_rand() % arr.size();
|
|
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
|
|
std::cerr << fmt::format("inserted (pos {})\n", pos);
|
|
}
|
|
else
|
|
{
|
|
arr.insert(arr.begin(), getRandomField(0));
|
|
std::cerr << "inserted (0)\n";
|
|
}
|
|
|
|
}
|
|
|
|
for (auto & element : arr)
|
|
{
|
|
element = fuzzField(element);
|
|
}
|
|
}
|
|
|
|
return field;
|
|
}
|
|
|
|
ASTPtr QueryFuzzer::getRandomColumnLike()
|
|
{
|
|
if (column_like.empty())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()]->clone();
|
|
new_ast->setAlias("");
|
|
|
|
return new_ast;
|
|
}
|
|
|
|
ASTPtr QueryFuzzer::getRandomExpressionList()
|
|
{
|
|
if (column_like.empty())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
ASTPtr new_ast = std::make_shared<ASTExpressionList>();
|
|
for (size_t i = 0; i < fuzz_rand() % 5 + 1; ++i)
|
|
{
|
|
new_ast->children.push_back(getRandomColumnLike());
|
|
}
|
|
return new_ast;
|
|
}
|
|
|
|
void QueryFuzzer::replaceWithColumnLike(ASTPtr & ast)
|
|
{
|
|
if (column_like.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
std::string old_alias = ast->tryGetAlias();
|
|
ast = getRandomColumnLike();
|
|
ast->setAlias(old_alias);
|
|
}
|
|
|
|
void QueryFuzzer::replaceWithTableLike(ASTPtr & ast)
|
|
{
|
|
if (table_like.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()]->clone();
|
|
|
|
std::string old_alias = ast->tryGetAlias();
|
|
new_ast->setAlias(old_alias);
|
|
|
|
ast = new_ast;
|
|
}
|
|
|
|
void QueryFuzzer::fuzzOrderByElement(ASTOrderByElement * elem)
|
|
{
|
|
switch (fuzz_rand() % 10)
|
|
{
|
|
case 0:
|
|
elem->direction = -1;
|
|
break;
|
|
case 1:
|
|
elem->direction = 1;
|
|
break;
|
|
case 2:
|
|
elem->nulls_direction = -1;
|
|
elem->nulls_direction_was_explicitly_specified = true;
|
|
break;
|
|
case 3:
|
|
elem->nulls_direction = 1;
|
|
elem->nulls_direction_was_explicitly_specified = true;
|
|
break;
|
|
case 4:
|
|
elem->nulls_direction = elem->direction;
|
|
elem->nulls_direction_was_explicitly_specified = false;
|
|
break;
|
|
default:
|
|
// do nothing
|
|
break;
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::fuzzOrderByList(IAST * ast)
|
|
{
|
|
if (!ast)
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto * list = assert_cast<ASTExpressionList *>(ast);
|
|
|
|
// Remove element
|
|
if (fuzz_rand() % 50 == 0 && list->children.size() > 1)
|
|
{
|
|
// Don't remove last element -- this leads to questionable
|
|
// constructs such as empty select.
|
|
list->children.erase(list->children.begin()
|
|
+ fuzz_rand() % list->children.size());
|
|
}
|
|
|
|
// Add element
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
auto * pos = list->children.empty() ? list->children.begin() : list->children.begin() + fuzz_rand() % list->children.size();
|
|
auto col = getRandomColumnLike();
|
|
if (col)
|
|
{
|
|
auto elem = std::make_shared<ASTOrderByElement>();
|
|
elem->children.push_back(col);
|
|
elem->direction = 1;
|
|
elem->nulls_direction = 1;
|
|
elem->nulls_direction_was_explicitly_specified = false;
|
|
elem->with_fill = false;
|
|
|
|
list->children.insert(pos, elem);
|
|
}
|
|
else
|
|
{
|
|
std::cerr << "No random column.\n";
|
|
}
|
|
}
|
|
|
|
// We don't have to recurse here to fuzz the children, this is handled by
|
|
// the generic recursion into IAST.children.
|
|
}
|
|
|
|
void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
|
|
{
|
|
if (!ast)
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto * impl = assert_cast<ASTExpressionList *>(ast);
|
|
|
|
// Remove element
|
|
if (fuzz_rand() % 50 == 0 && impl->children.size() > 1)
|
|
{
|
|
// Don't remove last element -- this leads to questionable
|
|
// constructs such as empty select.
|
|
impl->children.erase(impl->children.begin()
|
|
+ fuzz_rand() % impl->children.size());
|
|
}
|
|
|
|
// Add element
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
auto * pos = impl->children.empty() ? impl->children.begin() : impl->children.begin() + fuzz_rand() % impl->children.size();
|
|
auto col = getRandomColumnLike();
|
|
if (col)
|
|
impl->children.insert(pos, col);
|
|
else
|
|
std::cerr << "No random column.\n";
|
|
}
|
|
|
|
// We don't have to recurse here to fuzz the children, this is handled by
|
|
// the generic recursion into IAST.children.
|
|
}
|
|
|
|
void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
|
|
{
|
|
switch (fuzz_rand() % 40)
|
|
{
|
|
case 0:
|
|
{
|
|
const auto r = fuzz_rand() % 3;
|
|
def.frame_type = r == 0 ? WindowFrame::FrameType::ROWS
|
|
: r == 1 ? WindowFrame::FrameType::RANGE
|
|
: WindowFrame::FrameType::GROUPS;
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
const auto r = fuzz_rand() % 3;
|
|
def.frame_begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
|
|
: r == 1 ? WindowFrame::BoundaryType::Current
|
|
: WindowFrame::BoundaryType::Offset;
|
|
|
|
if (def.frame_begin_type == WindowFrame::BoundaryType::Offset)
|
|
{
|
|
// The offsets are fuzzed normally through 'children'.
|
|
def.frame_begin_offset
|
|
= std::make_shared<ASTLiteral>(getRandomField(0));
|
|
}
|
|
else
|
|
{
|
|
def.frame_begin_offset = nullptr;
|
|
}
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
const auto r = fuzz_rand() % 3;
|
|
def.frame_end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
|
|
: r == 1 ? WindowFrame::BoundaryType::Current
|
|
: WindowFrame::BoundaryType::Offset;
|
|
|
|
if (def.frame_end_type == WindowFrame::BoundaryType::Offset)
|
|
{
|
|
def.frame_end_offset
|
|
= std::make_shared<ASTLiteral>(getRandomField(0));
|
|
}
|
|
else
|
|
{
|
|
def.frame_end_offset = nullptr;
|
|
}
|
|
break;
|
|
}
|
|
case 5:
|
|
{
|
|
def.frame_begin_preceding = fuzz_rand() % 2;
|
|
break;
|
|
}
|
|
case 6:
|
|
{
|
|
def.frame_end_preceding = fuzz_rand() % 2;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (def.frame_type == WindowFrame::FrameType::RANGE
|
|
&& def.frame_begin_type == WindowFrame::BoundaryType::Unbounded
|
|
&& def.frame_begin_preceding
|
|
&& def.frame_end_type == WindowFrame::BoundaryType::Current)
|
|
{
|
|
def.frame_is_default = true; /* NOLINT clang-tidy could you just shut up please */
|
|
}
|
|
else
|
|
{
|
|
def.frame_is_default = false;
|
|
}
|
|
}
|
|
|
|
bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create)
|
|
{
|
|
return create.columns_list && create.columns_list->columns;
|
|
}
|
|
|
|
static String getOriginalTableName(const String & full_name)
|
|
{
|
|
return full_name.substr(0, full_name.find("__fuzz_"));
|
|
}
|
|
|
|
static String getFuzzedTableName(const String & original_name, size_t index)
|
|
{
|
|
return original_name + "__fuzz_" + toString(index);
|
|
}
|
|
|
|
void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
|
|
{
|
|
if (create.columns_list && create.columns_list->columns)
|
|
{
|
|
for (auto & ast : create.columns_list->columns->children)
|
|
{
|
|
if (auto * column = ast->as<ASTColumnDeclaration>())
|
|
{
|
|
fuzzColumnDeclaration(*column);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (create.storage && create.storage->engine)
|
|
{
|
|
/// Replace ReplicatedMergeTree to ordinary MergeTree
|
|
/// to avoid inconsistency of metadata in zookeeper.
|
|
auto & engine_name = create.storage->engine->name;
|
|
if (startsWith(engine_name, "Replicated"))
|
|
{
|
|
engine_name = engine_name.substr(strlen("Replicated"));
|
|
if (auto & arguments = create.storage->engine->arguments)
|
|
{
|
|
auto & children = arguments->children;
|
|
if (children.size() <= 2)
|
|
arguments.reset();
|
|
else
|
|
children.erase(children.begin(), children.begin() + 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
auto full_name = create.getTable();
|
|
auto original_name = getOriginalTableName(full_name);
|
|
size_t index = index_of_fuzzed_table[original_name]++;
|
|
auto new_name = getFuzzedTableName(original_name, index);
|
|
|
|
create.setTable(new_name);
|
|
|
|
SipHash sip_hash;
|
|
sip_hash.update(original_name);
|
|
if (create.columns_list)
|
|
create.columns_list->updateTreeHash(sip_hash, /*ignore_aliases=*/ true);
|
|
if (create.storage)
|
|
create.storage->updateTreeHash(sip_hash, /*ignore_aliases=*/ true);
|
|
|
|
const auto hash = getSipHash128AsPair(sip_hash);
|
|
|
|
/// Save only tables with unique definition.
|
|
if (created_tables_hashes.insert(hash).second)
|
|
original_table_name_to_fuzzed[original_name].insert(new_name);
|
|
}
|
|
|
|
void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column)
|
|
{
|
|
if (column.type)
|
|
{
|
|
auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type));
|
|
|
|
ParserDataType parser;
|
|
column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
|
}
|
|
}
|
|
|
|
DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type)
|
|
{
|
|
/// Do not replace Array/Tuple/etc. with not Array/Tuple too often.
|
|
const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
|
|
if (type_array && fuzz_rand() % 4 != 0)
|
|
return std::make_shared<DataTypeArray>(fuzzDataType(type_array->getNestedType()));
|
|
|
|
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
|
|
if (type_tuple && fuzz_rand() % 4 != 0)
|
|
{
|
|
DataTypes elements;
|
|
for (const auto & element : type_tuple->getElements())
|
|
elements.push_back(fuzzDataType(element));
|
|
|
|
return type_tuple->haveExplicitNames()
|
|
? std::make_shared<DataTypeTuple>(elements, type_tuple->getElementNames())
|
|
: std::make_shared<DataTypeTuple>(elements);
|
|
}
|
|
|
|
const auto * type_map = typeid_cast<const DataTypeMap *>(type.get());
|
|
if (type_map && fuzz_rand() % 4 != 0)
|
|
{
|
|
auto key_type = fuzzDataType(type_map->getKeyType());
|
|
auto value_type = fuzzDataType(type_map->getValueType());
|
|
if (!DataTypeMap::checkKeyType(key_type))
|
|
key_type = type_map->getKeyType();
|
|
|
|
return std::make_shared<DataTypeMap>(key_type, value_type);
|
|
}
|
|
|
|
const auto * type_nullable = typeid_cast<const DataTypeNullable *>(type.get());
|
|
if (type_nullable)
|
|
{
|
|
size_t tmp = fuzz_rand() % 3;
|
|
if (tmp == 0)
|
|
return fuzzDataType(type_nullable->getNestedType());
|
|
|
|
if (tmp == 1)
|
|
{
|
|
auto nested_type = fuzzDataType(type_nullable->getNestedType());
|
|
if (nested_type->canBeInsideNullable())
|
|
return std::make_shared<DataTypeNullable>(nested_type);
|
|
}
|
|
}
|
|
|
|
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(type.get());
|
|
if (type_low_cardinality)
|
|
{
|
|
size_t tmp = fuzz_rand() % 3;
|
|
if (tmp == 0)
|
|
return fuzzDataType(type_low_cardinality->getDictionaryType());
|
|
|
|
if (tmp == 1)
|
|
{
|
|
auto nested_type = fuzzDataType(type_low_cardinality->getDictionaryType());
|
|
if (nested_type->canBeInsideLowCardinality())
|
|
return std::make_shared<DataTypeLowCardinality>(nested_type);
|
|
}
|
|
}
|
|
|
|
size_t tmp = fuzz_rand() % 8;
|
|
if (tmp == 0)
|
|
return std::make_shared<DataTypeArray>(type);
|
|
|
|
if (tmp <= 1 && type->canBeInsideNullable())
|
|
return std::make_shared<DataTypeNullable>(type);
|
|
|
|
if (tmp <= 2 && type->canBeInsideLowCardinality())
|
|
return std::make_shared<DataTypeLowCardinality>(type);
|
|
|
|
if (tmp <= 3)
|
|
return getRandomType();
|
|
|
|
return type;
|
|
}
|
|
|
|
DataTypePtr QueryFuzzer::getRandomType()
|
|
{
|
|
auto type_id = static_cast<TypeIndex>(fuzz_rand() % static_cast<size_t>(TypeIndex::Tuple) + 1);
|
|
|
|
if (type_id == TypeIndex::Tuple)
|
|
{
|
|
size_t tuple_size = fuzz_rand() % 6 + 1;
|
|
DataTypes elements;
|
|
for (size_t i = 0; i < tuple_size; ++i)
|
|
elements.push_back(getRandomType());
|
|
return std::make_shared<DataTypeTuple>(elements);
|
|
}
|
|
|
|
if (type_id == TypeIndex::Array)
|
|
return std::make_shared<DataTypeArray>(getRandomType());
|
|
|
|
/// NOLINTBEGIN(bugprone-macro-parentheses)
|
|
#define DISPATCH(DECIMAL) \
|
|
if (type_id == TypeIndex::DECIMAL) \
|
|
return std::make_shared<DataTypeDecimal<DECIMAL>>( \
|
|
DataTypeDecimal<DECIMAL>::maxPrecision(), \
|
|
(fuzz_rand() % DataTypeDecimal<DECIMAL>::maxPrecision()) + 1);
|
|
|
|
DISPATCH(Decimal32)
|
|
DISPATCH(Decimal64)
|
|
DISPATCH(Decimal128)
|
|
DISPATCH(Decimal256)
|
|
#undef DISPATCH
|
|
/// NOLINTEND(bugprone-macro-parentheses)
|
|
|
|
if (type_id == TypeIndex::FixedString)
|
|
return std::make_shared<DataTypeFixedString>(fuzz_rand() % 20);
|
|
|
|
if (type_id == TypeIndex::Enum8)
|
|
return std::make_shared<DataTypeUInt8>();
|
|
|
|
if (type_id == TypeIndex::Enum16)
|
|
return std::make_shared<DataTypeUInt16>();
|
|
|
|
return DataTypeFactory::instance().get(String(magic_enum::enum_name(type_id)));
|
|
}
|
|
|
|
void QueryFuzzer::fuzzTableName(ASTTableExpression & table)
|
|
{
|
|
if (!table.database_and_table_name || fuzz_rand() % 3 == 0)
|
|
return;
|
|
|
|
const auto * identifier = table.database_and_table_name->as<ASTTableIdentifier>();
|
|
if (!identifier)
|
|
return;
|
|
|
|
auto table_id = identifier->getTableId();
|
|
if (table_id.empty())
|
|
return;
|
|
|
|
auto original_name = getOriginalTableName(table_id.getTableName());
|
|
auto it = original_table_name_to_fuzzed.find(original_name);
|
|
if (it != original_table_name_to_fuzzed.end() && !it->second.empty())
|
|
{
|
|
auto new_table_name = it->second.begin();
|
|
std::advance(new_table_name, fuzz_rand() % it->second.size());
|
|
StorageID new_table_id(table_id.database_name, *new_table_name);
|
|
table.database_and_table_name = std::make_shared<ASTTableIdentifier>(new_table_id);
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain)
|
|
{
|
|
explain.setExplainKind(fuzzExplainKind(explain.getKind()));
|
|
|
|
bool settings_have_fuzzed = false;
|
|
for (auto & child : explain.children)
|
|
{
|
|
if (auto * settings_ast = typeid_cast<ASTSetQuery *>(child.get()))
|
|
{
|
|
fuzzExplainSettings(*settings_ast, explain.getKind());
|
|
settings_have_fuzzed = true;
|
|
}
|
|
/// Fuzzing other child like Explain Query
|
|
else
|
|
{
|
|
fuzz(child);
|
|
}
|
|
}
|
|
|
|
if (!settings_have_fuzzed)
|
|
{
|
|
auto settings_ast = std::make_shared<ASTSetQuery>();
|
|
settings_ast->is_standalone = false;
|
|
fuzzExplainSettings(*settings_ast, explain.getKind());
|
|
explain.setSettings(settings_ast);
|
|
}
|
|
}
|
|
|
|
ASTExplainQuery::ExplainKind QueryFuzzer::fuzzExplainKind(ASTExplainQuery::ExplainKind kind)
|
|
{
|
|
if (fuzz_rand() % 20 == 0)
|
|
{
|
|
return kind;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::ParsedAST;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::AnalyzedSyntax;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::QueryTree;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::QueryPlan;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::QueryPipeline;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::QueryEstimates;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::TableOverride;
|
|
}
|
|
else if (fuzz_rand() % 11 == 0)
|
|
{
|
|
return ASTExplainQuery::ExplainKind::CurrentTransaction;
|
|
}
|
|
return kind;
|
|
}
|
|
|
|
void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind)
|
|
{
|
|
auto & changes = settings_ast.changes;
|
|
|
|
static const std::unordered_map<ASTExplainQuery::ExplainKind, std::vector<String>> settings_by_kind
|
|
= {{ASTExplainQuery::ExplainKind::ParsedAST, {"graph", "optimize"}},
|
|
{ASTExplainQuery::ExplainKind::AnalyzedSyntax, {}},
|
|
{ASTExplainQuery::QueryTree, {"run_passes", "dump_passes", "dump_ast", "passes"}},
|
|
{ASTExplainQuery::ExplainKind::QueryPlan, {"header, description", "actions", "indexes", "optimize", "json", "sorting"}},
|
|
{ASTExplainQuery::ExplainKind::QueryPipeline, {"header", "graph=1", "compact"}},
|
|
{ASTExplainQuery::ExplainKind::QueryEstimates, {}},
|
|
{ASTExplainQuery::ExplainKind::TableOverride, {}},
|
|
{ASTExplainQuery::ExplainKind::CurrentTransaction, {}}};
|
|
|
|
const auto & settings = settings_by_kind.at(kind);
|
|
if (fuzz_rand() % 50 == 0 && !changes.empty())
|
|
{
|
|
changes.erase(changes.begin() + fuzz_rand() % changes.size());
|
|
}
|
|
|
|
for (const auto & setting : settings)
|
|
{
|
|
if (fuzz_rand() % 5 == 0)
|
|
{
|
|
changes.emplace_back(setting, true);
|
|
}
|
|
}
|
|
}
|
|
|
|
static ASTPtr tryParseInsertQuery(const String & full_query)
|
|
{
|
|
const char * pos = full_query.data();
|
|
const char * end = full_query.data() + full_query.size();
|
|
|
|
ParserInsertQuery parser(end, false);
|
|
String message;
|
|
|
|
return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
|
}
|
|
|
|
ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query)
|
|
{
|
|
auto parsed_query = tryParseInsertQuery(full_query);
|
|
if (!parsed_query)
|
|
return {};
|
|
|
|
const auto & insert = *parsed_query->as<ASTInsertQuery>();
|
|
if (!insert.table)
|
|
return {};
|
|
|
|
auto table_name = insert.getTable();
|
|
auto it = original_table_name_to_fuzzed.find(table_name);
|
|
if (it == original_table_name_to_fuzzed.end())
|
|
return {};
|
|
|
|
ASTs queries;
|
|
for (const auto & fuzzed_name : it->second)
|
|
{
|
|
/// Parse query from scratch for each table instead of clone,
|
|
/// to store proper pointers to inlined data,
|
|
/// which are not copied during clone.
|
|
auto & query = queries.emplace_back(tryParseInsertQuery(full_query));
|
|
query->as<ASTInsertQuery>()->setTable(fuzzed_name);
|
|
}
|
|
|
|
return queries;
|
|
}
|
|
|
|
ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
|
{
|
|
if (drop_query.kind != ASTDropQuery::Drop)
|
|
return {};
|
|
|
|
auto table_name = drop_query.getTable();
|
|
auto it = index_of_fuzzed_table.find(table_name);
|
|
if (it == index_of_fuzzed_table.end())
|
|
return {};
|
|
|
|
ASTs queries;
|
|
/// Drop all created tables, not only unique ones.
|
|
for (size_t i = 0; i < it->second; ++i)
|
|
{
|
|
auto fuzzed_name = getFuzzedTableName(table_name, i);
|
|
auto & query = queries.emplace_back(drop_query.clone());
|
|
query->as<ASTDropQuery>()->setTable(fuzzed_name);
|
|
/// Just in case add IF EXISTS to avoid exceptions.
|
|
query->as<ASTDropQuery>()->if_exists = true;
|
|
}
|
|
|
|
index_of_fuzzed_table.erase(it);
|
|
original_table_name_to_fuzzed.erase(table_name);
|
|
|
|
return queries;
|
|
}
|
|
|
|
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
|
{
|
|
if (ast == nullptr)
|
|
return;
|
|
|
|
auto remove_fuzzed_table = [this](const auto & table_name)
|
|
{
|
|
auto pos = table_name.find("__fuzz_");
|
|
if (pos != std::string::npos)
|
|
{
|
|
auto original_name = table_name.substr(0, pos);
|
|
auto it = original_table_name_to_fuzzed.find(original_name);
|
|
if (it != original_table_name_to_fuzzed.end())
|
|
it->second.erase(table_name);
|
|
}
|
|
};
|
|
|
|
if (const auto * create = ast->as<ASTCreateQuery>())
|
|
remove_fuzzed_table(create->getTable());
|
|
|
|
if (const auto * insert = ast->as<ASTInsertQuery>())
|
|
remove_fuzzed_table(insert->getTable());
|
|
}
|
|
|
|
void QueryFuzzer::fuzz(ASTs & asts)
|
|
{
|
|
for (auto & ast : asts)
|
|
{
|
|
fuzz(ast);
|
|
}
|
|
}
|
|
|
|
struct ScopedIncrement
|
|
{
|
|
size_t & counter;
|
|
|
|
explicit ScopedIncrement(size_t & counter_) : counter(counter_) { ++counter; }
|
|
~ScopedIncrement() { --counter; }
|
|
};
|
|
|
|
void QueryFuzzer::fuzz(ASTPtr & ast)
|
|
{
|
|
if (!ast)
|
|
return;
|
|
|
|
// Check for exceeding max depth.
|
|
ScopedIncrement depth_increment(current_ast_depth);
|
|
if (current_ast_depth > 500)
|
|
{
|
|
// The AST is too deep (see the comment for current_ast_depth). Throw
|
|
// an exception to fail fast and not use this query as an etalon, or we'll
|
|
// end up in a very slow and useless loop. It also makes sense to set it
|
|
// lower than the default max parse depth on the server (1000), so that
|
|
// we don't get the useless error about parse depth from the server either.
|
|
throw Exception(ErrorCodes::TOO_DEEP_RECURSION,
|
|
"AST depth exceeded while fuzzing ({})", current_ast_depth);
|
|
}
|
|
|
|
// Check for loops.
|
|
auto [_, inserted] = debug_visited_nodes.insert(ast.get());
|
|
if (!inserted)
|
|
{
|
|
fmt::print(stderr, "The AST node '{}' was already visited before."
|
|
" Depth {}, {} visited nodes, current top AST:\n{}\n",
|
|
static_cast<void *>(ast.get()), current_ast_depth,
|
|
debug_visited_nodes.size(), (*debug_top_ast)->dumpTree());
|
|
assert(false);
|
|
}
|
|
|
|
// The fuzzing.
|
|
if (auto * with_union = typeid_cast<ASTSelectWithUnionQuery *>(ast.get()))
|
|
{
|
|
fuzz(with_union->list_of_selects);
|
|
/// Fuzzing SELECT query to EXPLAIN query randomly.
|
|
/// And we only fuzzing the root query into an EXPLAIN query, not fuzzing subquery
|
|
if (fuzz_rand() % 20 == 0 && current_ast_depth <= 1)
|
|
{
|
|
auto explain = std::make_shared<ASTExplainQuery>(fuzzExplainKind());
|
|
|
|
auto settings_ast = std::make_shared<ASTSetQuery>();
|
|
settings_ast->is_standalone = false;
|
|
fuzzExplainSettings(*settings_ast, explain->getKind());
|
|
explain->setSettings(settings_ast);
|
|
|
|
explain->setExplainedQuery(ast);
|
|
ast = explain;
|
|
}
|
|
}
|
|
else if (auto * with_intersect_except = typeid_cast<ASTSelectIntersectExceptQuery *>(ast.get()))
|
|
{
|
|
auto selects = with_intersect_except->getListOfSelects();
|
|
fuzz(selects);
|
|
}
|
|
else if (auto * tables = typeid_cast<ASTTablesInSelectQuery *>(ast.get()))
|
|
{
|
|
fuzz(tables->children);
|
|
}
|
|
else if (auto * tables_element = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
|
{
|
|
fuzz(tables_element->table_join);
|
|
fuzz(tables_element->table_expression);
|
|
fuzz(tables_element->array_join);
|
|
}
|
|
else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
|
|
{
|
|
fuzzTableName(*table_expr);
|
|
fuzz(table_expr->children);
|
|
}
|
|
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
|
|
{
|
|
fuzz(expr_list->children);
|
|
}
|
|
else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
|
|
{
|
|
fuzzOrderByElement(order_by_element);
|
|
}
|
|
else if (auto * fn = typeid_cast<ASTFunction *>(ast.get()))
|
|
{
|
|
fuzzColumnLikeExpressionList(fn->arguments.get());
|
|
fuzzColumnLikeExpressionList(fn->parameters.get());
|
|
|
|
if (fn->is_window_function && fn->window_definition)
|
|
{
|
|
auto & def = fn->window_definition->as<ASTWindowDefinition &>();
|
|
fuzzColumnLikeExpressionList(def.partition_by.get());
|
|
fuzzOrderByList(def.order_by.get());
|
|
fuzzWindowFrame(def);
|
|
}
|
|
|
|
fuzz(fn->children);
|
|
}
|
|
else if (auto * select = typeid_cast<ASTSelectQuery *>(ast.get()))
|
|
{
|
|
fuzzColumnLikeExpressionList(select->select().get());
|
|
|
|
if (select->groupBy().get())
|
|
{
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->groupBy()->children.clear();
|
|
select->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
|
|
select->group_by_with_grouping_sets = false;
|
|
select->group_by_with_rollup = false;
|
|
select->group_by_with_cube = false;
|
|
select->group_by_with_totals = true;
|
|
}
|
|
else if (fuzz_rand() % 100 == 0)
|
|
{
|
|
select->group_by_with_grouping_sets = !select->group_by_with_grouping_sets;
|
|
}
|
|
else if (fuzz_rand() % 100 == 0)
|
|
{
|
|
select->group_by_with_rollup = !select->group_by_with_rollup;
|
|
}
|
|
else if (fuzz_rand() % 100 == 0)
|
|
{
|
|
select->group_by_with_cube = !select->group_by_with_cube;
|
|
}
|
|
else if (fuzz_rand() % 100 == 0)
|
|
{
|
|
select->group_by_with_totals = !select->group_by_with_totals;
|
|
}
|
|
}
|
|
else if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->setExpression(ASTSelectQuery::Expression::GROUP_BY, getRandomExpressionList());
|
|
}
|
|
|
|
if (select->where().get())
|
|
{
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->where()->children.clear();
|
|
select->setExpression(ASTSelectQuery::Expression::WHERE, {});
|
|
}
|
|
else if (!select->prewhere().get())
|
|
{
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone());
|
|
|
|
if (fuzz_rand() % 2 == 0)
|
|
{
|
|
select->where()->children.clear();
|
|
select->setExpression(ASTSelectQuery::Expression::WHERE, {});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike());
|
|
}
|
|
|
|
if (select->prewhere().get())
|
|
{
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->prewhere()->children.clear();
|
|
select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
|
|
}
|
|
else if (!select->where().get())
|
|
{
|
|
if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone());
|
|
|
|
if (fuzz_rand() % 2 == 0)
|
|
{
|
|
select->prewhere()->children.clear();
|
|
select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (fuzz_rand() % 50 == 0)
|
|
{
|
|
select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike());
|
|
}
|
|
|
|
fuzzOrderByList(select->orderBy().get());
|
|
|
|
fuzz(select->children);
|
|
}
|
|
/*
|
|
* The time to fuzz the settings has not yet come.
|
|
* Apparently we don't have any infractructure to validate the values of
|
|
* the settings, and the first query with max_block_size = -1 breaks
|
|
* because of overflows here and there.
|
|
*//*
|
|
* else if (auto * set = typeid_cast<ASTSetQuery *>(ast.get()))
|
|
* {
|
|
* for (auto & c : set->changes)
|
|
* {
|
|
* if (fuzz_rand() % 50 == 0)
|
|
* {
|
|
* c.value = fuzzField(c.value);
|
|
* }
|
|
* }
|
|
* }
|
|
*/
|
|
else if (auto * literal = typeid_cast<ASTLiteral *>(ast.get()))
|
|
{
|
|
// There is a caveat with fuzzing the children: many ASTs also keep the
|
|
// links to particular children in own fields. This means that replacing
|
|
// the child with another object might lead to error. Many of these fields
|
|
// are ASTPtr -- this is redundant ownership, but hides the error if the
|
|
// child field is replaced. Others can be ASTLiteral * or the like, which
|
|
// leads to segfault if the pointed-to AST is replaced.
|
|
// Replacing children is safe in case of ASTExpressionList. In a more
|
|
// general case, we can change the value of ASTLiteral, which is what we
|
|
// do here.
|
|
if (fuzz_rand() % 11 == 0)
|
|
{
|
|
literal->value = fuzzField(literal->value);
|
|
}
|
|
}
|
|
else if (auto * create_query = typeid_cast<ASTCreateQuery *>(ast.get()))
|
|
{
|
|
fuzzCreateQuery(*create_query);
|
|
}
|
|
else if (auto * explain_query = typeid_cast<ASTExplainQuery *>(ast.get()))
|
|
{
|
|
/// Fuzzing EXPLAIN query to SELECT query randomly
|
|
if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select)
|
|
{
|
|
auto select_query = explain_query->getExplainedQuery()->clone();
|
|
fuzz(select_query);
|
|
ast = select_query;
|
|
}
|
|
else
|
|
{
|
|
fuzzExplainQuery(*explain_query);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fuzz(ast->children);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This functions collects various parts of query that we can then substitute
|
|
* to a query being fuzzed.
|
|
*
|
|
* TODO: we just stop remembering new parts after our corpus reaches certain size.
|
|
* This is boring, should implement a random replacement of existing parst with
|
|
* small probability. Do this after we add this fuzzer to CI and fix all the
|
|
* problems it can routinely find even in this boring version.
|
|
*/
|
|
void QueryFuzzer::collectFuzzInfoMain(ASTPtr ast)
|
|
{
|
|
collectFuzzInfoRecurse(ast);
|
|
|
|
aliases.clear();
|
|
for (const auto & alias : aliases_set)
|
|
{
|
|
aliases.push_back(alias);
|
|
}
|
|
|
|
column_like.clear();
|
|
for (const auto & [name, value] : column_like_map)
|
|
{
|
|
column_like.push_back(value);
|
|
}
|
|
|
|
table_like.clear();
|
|
for (const auto & [name, value] : table_like_map)
|
|
{
|
|
table_like.push_back(value);
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::addTableLike(ASTPtr ast)
|
|
{
|
|
if (table_like_map.size() > 1000)
|
|
{
|
|
table_like_map.clear();
|
|
}
|
|
|
|
const auto name = ast->formatForErrorMessage();
|
|
if (name.size() < 200)
|
|
{
|
|
table_like_map.insert({name, ast});
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::addColumnLike(ASTPtr ast)
|
|
{
|
|
if (column_like_map.size() > 1000)
|
|
{
|
|
column_like_map.clear();
|
|
}
|
|
|
|
const auto name = ast->formatForErrorMessage();
|
|
if (name == "Null")
|
|
{
|
|
// The `Null` identifier from FORMAT Null clause. We don't quote it
|
|
// properly when formatting the AST, and while the resulting query
|
|
// technically works, it has non-standard case for Null (the standard
|
|
// is NULL), so it breaks the query formatting idempotence check.
|
|
// Just plug this particular case for now.
|
|
return;
|
|
}
|
|
if (name.size() < 200)
|
|
{
|
|
column_like_map.insert({name, ast});
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::collectFuzzInfoRecurse(ASTPtr ast)
|
|
{
|
|
if (auto * impl = dynamic_cast<ASTWithAlias *>(ast.get()))
|
|
{
|
|
if (aliases_set.size() > 1000)
|
|
{
|
|
aliases_set.clear();
|
|
}
|
|
|
|
aliases_set.insert(impl->alias);
|
|
}
|
|
|
|
if (typeid_cast<ASTLiteral *>(ast.get()))
|
|
{
|
|
addColumnLike(ast);
|
|
}
|
|
else if (typeid_cast<ASTIdentifier *>(ast.get()))
|
|
{
|
|
addColumnLike(ast);
|
|
}
|
|
else if (typeid_cast<ASTFunction *>(ast.get()))
|
|
{
|
|
addColumnLike(ast);
|
|
}
|
|
else if (typeid_cast<ASTTableExpression *>(ast.get()))
|
|
{
|
|
addTableLike(ast);
|
|
}
|
|
else if (typeid_cast<ASTSubquery *>(ast.get()))
|
|
{
|
|
addTableLike(ast);
|
|
}
|
|
|
|
for (const auto & child : ast->children)
|
|
{
|
|
collectFuzzInfoRecurse(child);
|
|
}
|
|
}
|
|
|
|
void QueryFuzzer::fuzzMain(ASTPtr & ast)
|
|
{
|
|
current_ast_depth = 0;
|
|
debug_visited_nodes.clear();
|
|
debug_top_ast = *
|
|
|
|
collectFuzzInfoMain(ast);
|
|
fuzz(ast);
|
|
|
|
std::cout << std::endl;
|
|
WriteBufferFromOStream ast_buf(std::cout, 4096);
|
|
formatAST(*ast, ast_buf, false /*highlight*/);
|
|
ast_buf.finalize();
|
|
std::cout << std::endl << std::endl;
|
|
}
|
|
|
|
}
|