formatting & fuzzer support

This commit is contained in:
Alexander Kuzmenkov 2020-12-18 03:21:23 +03:00
parent a1326414ff
commit fc426807a8
4 changed files with 167 additions and 46 deletions

View File

@ -14,6 +14,7 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -205,14 +206,88 @@ void QueryFuzzer::replaceWithTableLike(ASTPtr & ast)
ast = new_ast;
}
void QueryFuzzer::fuzzColumnLikeExpressionList(ASTPtr ast)
void QueryFuzzer::fuzzOrderByElement(ASTOrderByElement * elem)
{
switch (fuzz_rand() % 10)
{
case 0:
elem->direction = -1;
break;
case 1:
elem->direction = 1;
break;
case 2:
elem->nulls_direction = -1;
elem->nulls_direction_was_explicitly_specified = true;
break;
case 3:
elem->nulls_direction = 1;
elem->nulls_direction_was_explicitly_specified = true;
break;
case 4:
elem->nulls_direction = elem->direction;
elem->nulls_direction_was_explicitly_specified = false;
break;
default:
// do nothing
break;
}
}
void QueryFuzzer::fuzzOrderByList(IAST * ast)
{
if (!ast)
{
return;
}
auto * impl = assert_cast<ASTExpressionList *>(ast.get());
auto * list = assert_cast<ASTExpressionList *>(ast);
// Remove element
if (fuzz_rand() % 50 == 0 && list->children.size() > 1)
{
// Don't remove last element -- this leads to questionable
// constructs such as empty select.
list->children.erase(list->children.begin()
+ fuzz_rand() % list->children.size());
}
// Add element
if (fuzz_rand() % 50 == 0)
{
auto pos = list->children.empty()
? list->children.begin()
: list->children.begin() + fuzz_rand() % list->children.size();
auto col = getRandomColumnLike();
if (col)
{
auto elem = std::make_shared<ASTOrderByElement>();
elem->children.push_back(col);
elem->direction = 1;
elem->nulls_direction = 1;
elem->nulls_direction_was_explicitly_specified = false;
elem->with_fill = false;
list->children.insert(pos, elem);
}
else
{
fprintf(stderr, "no random col!\n");
}
}
// We don't have to recurse here to fuzz the children, this is handled by
// the generic recursion into IAST.children.
}
void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
{
if (!ast)
{
return;
}
auto * impl = assert_cast<ASTExpressionList *>(ast);
// Remove element
if (fuzz_rand() % 50 == 0 && impl->children.size() > 1)
@ -281,17 +356,28 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
{
fuzz(expr_list->children);
}
else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
{
fuzzOrderByElement(order_by_element);
}
else if (auto * fn = typeid_cast<ASTFunction *>(ast.get()))
{
fuzzColumnLikeExpressionList(fn->arguments);
fuzzColumnLikeExpressionList(fn->parameters);
fuzzColumnLikeExpressionList(fn->arguments.get());
fuzzColumnLikeExpressionList(fn->parameters.get());
fuzz(fn->children);
if (fn->is_window_function)
{
fuzzColumnLikeExpressionList(fn->window_partition_by);
fuzzOrderByList(fn->window_order_by);
}
}
else if (auto * select = typeid_cast<ASTSelectQuery *>(ast.get()))
{
fuzzColumnLikeExpressionList(select->select());
fuzzColumnLikeExpressionList(select->groupBy());
fuzzColumnLikeExpressionList(select->select().get());
fuzzColumnLikeExpressionList(select->groupBy().get());
fuzzOrderByList(select->orderBy().get());
fuzz(select->children);
}

View File

@ -12,6 +12,9 @@
namespace DB
{
class ASTExpressionList;
class ASTOrderByElement;
/*
* This is an AST-based query fuzzer that makes random modifications to query
* AST, changing numbers, list of columns, functions, etc. It remembers part of
@ -46,7 +49,9 @@ struct QueryFuzzer
ASTPtr getRandomColumnLike();
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzColumnLikeExpressionList(ASTPtr ast);
void fuzzOrderByElement(ASTOrderByElement * elem);
void fuzzOrderByList(IAST * ast);
void fuzzColumnLikeExpressionList(IAST * ast);
void fuzz(ASTs & asts);
void fuzz(ASTPtr & ast);
void collectFuzzInfoMain(const ASTPtr ast);

View File

@ -740,8 +740,14 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
// Also add columns from PARTITION BY and ORDER BY of window functions.
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
visit(node.window_partition_by->clone(), data);
visit(node.window_order_by->clone(), data);
if (node.window_partition_by)
{
visit(node.window_partition_by->clone(), data);
}
if (node.window_order_by)
{
visit(node.window_order_by->clone(), data);
}
// Don't need to do anything more for window functions here -- the
// resulting column is added in ExpressionAnalyzer, similar to the

View File

@ -435,45 +435,69 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
}
}
if (!written)
if (written)
{
settings.ostr << (settings.hilite ? hilite_function : "") << name;
if (parameters)
{
settings.ostr << '(' << (settings.hilite ? hilite_none : "");
parameters->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_function : "") << ')';
}
if ((arguments && !arguments->children.empty()) || !no_empty_args)
settings.ostr << '(' << (settings.hilite ? hilite_none : "");
if (arguments)
{
bool special_hilite_regexp = settings.hilite
&& (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
|| name == "replaceRegexpAll");
for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
{
if (i != 0)
settings.ostr << ", ";
bool special_hilite = false;
if (i == 1 && special_hilite_regexp)
special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-");
if (!special_hilite)
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
}
if ((arguments && !arguments->children.empty()) || !no_empty_args)
settings.ostr << (settings.hilite ? hilite_function : "") << ')';
settings.ostr << (settings.hilite ? hilite_none : "");
return;
}
settings.ostr << (settings.hilite ? hilite_function : "") << name;
if (parameters)
{
settings.ostr << '(' << (settings.hilite ? hilite_none : "");
parameters->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_function : "") << ')';
}
if ((arguments && !arguments->children.empty()) || !no_empty_args)
settings.ostr << '(' << (settings.hilite ? hilite_none : "");
if (arguments)
{
bool special_hilite_regexp = settings.hilite
&& (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
|| name == "replaceRegexpAll");
for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
{
if (i != 0)
settings.ostr << ", ";
bool special_hilite = false;
if (i == 1 && special_hilite_regexp)
special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-");
if (!special_hilite)
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
}
if ((arguments && !arguments->children.empty()) || !no_empty_args)
settings.ostr << (settings.hilite ? hilite_function : "") << ')';
settings.ostr << (settings.hilite ? hilite_none : "");
if (!is_window_function)
{
return;
}
settings.ostr << " OVER (";
if (window_partition_by)
{
settings.ostr << "PARTITION BY ";
window_partition_by->formatImpl(settings, state, nested_dont_need_parens);
}
if (window_partition_by && window_order_by)
{
settings.ostr << " ";
}
if (window_order_by)
{
settings.ostr << "ORDER BY ";
window_order_by->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << ")";
}
}