Merge pull request #21466 from ClickHouse/aku/frame-formatting

add query formatting idempotence check to fuzzer
This commit is contained in:
Alexander Kuzmenkov 2021-03-18 17:57:11 +03:00 committed by GitHub
commit f85b089292
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 148 additions and 28 deletions

View File

@ -65,6 +65,7 @@
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTUseQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ASTLiteral.h>
@ -116,6 +117,34 @@ namespace ErrorCodes
}
static bool queryHasWithClause(const IAST * ast)
{
if (const auto * select = dynamic_cast<const ASTSelectQuery *>(ast);
select && select->with())
{
return true;
}
// This full recursive walk is somewhat excessive, because most of the
// children are not queries, but on the other hand it will let us to avoid
// breakage when the AST structure changes and some new variant of query
// nesting is added. This function is used in fuzzer, so it's better to be
// defensive and avoid weird unexpected errors.
// clang-tidy is confused by this function: it thinks that if `select` is
// nullptr, `ast` is also nullptr, and complains about nullptr dereference.
// NOLINTNEXTLINE
for (const auto & child : ast->children)
{
if (queryHasWithClause(child.get()))
{
return true;
}
}
return false;
}
class Client : public Poco::Util::Application
{
public:
@ -1255,6 +1284,29 @@ private:
return true;
}
// Prints changed settings to stderr. Useful for debugging fuzzing failures.
void printChangedSettings() const
{
const auto & changes = context.getSettingsRef().changes();
if (!changes.empty())
{
fmt::print(stderr, "Changed settings: ");
for (size_t i = 0; i < changes.size(); ++i)
{
if (i)
{
fmt::print(stderr, ", ");
}
fmt::print(stderr, "{} = '{}'", changes[i].name,
toString(changes[i].value));
}
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "No changed settings.\n");
}
}
/// Returns false when server is not available.
bool processWithFuzzing(const String & text)
@ -1317,9 +1369,14 @@ private:
auto base_after_fuzz = fuzz_base->formatForErrorMessage();
// Debug AST cloning errors.
// Check that the source AST didn't change after fuzzing. This
// helps debug AST cloning errors, where the cloned AST doesn't
// clone all its children, and erroneously points to some source
// child elements.
if (base_before_fuzz != base_after_fuzz)
{
printChangedSettings();
fmt::print(stderr,
"Base before fuzz: {}\n"
"Base after fuzz: {}\n",
@ -1334,7 +1391,7 @@ private:
fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly.");
assert(false);
exit(1);
}
auto fuzzed_text = ast_to_process->formatForErrorMessage();
@ -1378,29 +1435,76 @@ private:
// Print the changed settings because they might be needed to
// reproduce the error.
const auto & changes = context.getSettingsRef().changes();
if (!changes.empty())
{
fmt::print(stderr, "Changed settings: ");
for (size_t i = 0; i < changes.size(); ++i)
{
if (i)
{
fmt::print(stderr, ", ");
}
fmt::print(stderr, "{} = '{}'", changes[i].name,
toString(changes[i].value));
}
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "No changed settings.\n");
}
printChangedSettings();
return false;
}
// Check that after the query is formatted, we can parse it back,
// format again and get the same result. Unfortunately, we can't
// compare the ASTs, which would be more sensitive to errors. This
// double formatting check doesn't catch all errors, e.g. we can
// format query incorrectly, but to a valid SQL that we can then
// parse and format into the same SQL.
// There are some complicated cases where we can generate the SQL
// which we can't parse:
// * first argument of lambda() replaced by fuzzer with
// something else, leading to constructs such as
// arrayMap((min(x) + 3) -> x + 1, ....)
// * internals of Enum replaced, leading to:
// Enum(equals(someFunction(y), 3)).
// And there are even the cases when we can parse the query, but
// it's logically incorrect and its formatting is a mess, such as
// when `lambda()` function gets substituted into a wrong place.
// To avoid dealing with these cases, run the check only for the
// queries we were able to successfully execute.
// The final caveat is that sometimes WITH queries are not executed,
// if they are not referenced by the main SELECT, so they can still
// have the aforementioned problems. Disable this check for such
// queries, for lack of a better solution.
if (!have_error && queryHasWithClause(parsed_query.get()))
{
ASTPtr parsed_formatted_query;
try
{
const auto * tmp_pos = query_to_send.c_str();
parsed_formatted_query = parseQuery(tmp_pos,
tmp_pos + query_to_send.size(),
false /* allow_multi_statements */);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR)
{
throw;
}
}
if (parsed_formatted_query)
{
const auto formatted_twice
= parsed_formatted_query->formatForErrorMessage();
if (formatted_twice != query_to_send)
{
fmt::print(stderr, "The query formatting is broken.\n");
printChangedSettings();
fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
formatted_twice, query_to_send);
fmt::print(stderr, "In more detail:\n");
fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree());
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send);
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
exit(1);
}
}
}
// The server is still alive so we're going to continue fuzzing.
// Determine what we're going to use as the starting AST.
if (have_error)

View File

@ -570,6 +570,15 @@ void QueryFuzzer::addColumnLike(const ASTPtr ast)
}
const auto name = ast->formatForErrorMessage();
if (name == "Null")
{
// The `Null` identifier from FORMAT Null clause. We don't quote it
// properly when formatting the AST, and while the resulting query
// technically works, it has non-standard case for Null (the standard
// is NULL), so it breaks the query formatting idempotence check.
// Just plug this particular case for now.
return;
}
if (name.size() < 200)
{
column_like_map.insert({name, ast});

View File

@ -180,7 +180,17 @@ String FieldVisitorToString::operator() (const Tuple & x) const
{
WriteBufferFromOwnString wb;
wb << '(';
// For single-element tuples we must use the explicit tuple() function,
// or they will be parsed back as plain literals.
if (x.size() > 1)
{
wb << '(';
}
else
{
wb << "tuple(";
}
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())

View File

@ -452,7 +452,7 @@ template <> bool decimalLessOrEqual(DateTime64 x, DateTime64 y, UInt32 x_scale,
inline void writeText(const Null &, WriteBuffer & buf)
{
writeText(std::string("Null"), buf);
writeText(std::string("NULL"), buf);
}
String toString(const Field & x)

View File

@ -533,6 +533,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
ParserKeyword keyword_groups("GROUPS");
ParserKeyword keyword_range("RANGE");
node->frame.is_default = false;
if (keyword_rows.ignore(pos, expected))
{
node->frame.type = WindowFrame::FrameType::Rows;
@ -548,6 +549,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
else
{
/* No frame clause. */
node->frame.is_default = true;
return true;
}
@ -699,11 +701,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
}
}
if (!(node->frame == WindowFrame{}))
{
node->frame.is_default = false;
}
return true;
}