check formatting only for the queries we can execute

This commit is contained in:
Alexander Kuzmenkov 2021-03-16 02:18:50 +03:00
parent e485a27dc1
commit 6403198c84

View File

@ -1372,65 +1372,6 @@ private:
continue;
}
// Check that the query is formatted properly and we can parse
// it back and format again and get the same result. Unfortunately
// we can't compare the ASTs, which would be more sensitive to
// errors. This double formatting check doesn't catch all errors,
// e.g. we can format query incorrectly, but to a valid SQL that
// we can then parse and format into the same SQL.
{
ASTPtr parsed_formatted_query;
try
{
const auto * tmp_pos = fuzzed_text.c_str();
parsed_formatted_query = parseQuery(tmp_pos,
tmp_pos + fuzzed_text.size(),
false /* allow_multi_statements */);
}
catch (Exception & e)
{
// Some complicated cases where we can generate the SQL
// which we can't parse:
// * first argument of lambda() replaced by fuzzer with
// something else, leading to constructs such as
// arrayMap((min(x) + 3) -> x + 1, ....)
// * internals of Enum replaced, leading to:
// Enum(equals(someFunction(y), 3)).
// We could filter them on case-by-case basis, but they
// are probably also helpful in that they test the parsing
// errors, so let's just ignore them in this check and
// send them to the server normally.
if (e.code() != ErrorCodes::SYNTAX_ERROR)
{
throw;
}
}
if (parsed_formatted_query)
{
const auto formatted_twice
= parsed_formatted_query->formatForErrorMessage();
if (formatted_twice != fuzzed_text)
{
fmt::print(stderr, "The query formatting is broken.\n");
printChangedSettings();
fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
formatted_twice, fuzzed_text);
fmt::print(stderr, "In more detail:\n");
fmt::print(stderr, "AST-1:\n'{}'\n", ast_to_process->dumpTree());
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", fuzzed_text);
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
exit(1);
}
}
}
parsed_query = ast_to_process;
query_to_send = parsed_query->formatForErrorMessage();
@ -1470,6 +1411,67 @@ private:
return false;
}
// Check that after the query is formatted, we can parse it back,
// format again and get the same result. Unfortunately, we can't
// compare the ASTs, which would be more sensitive to errors. This
// double formatting check doesn't catch all errors, e.g. we can
// format query incorrectly, but to a valid SQL that we can then
// parse and format into the same SQL.
// There are some complicated cases where we can generate the SQL
// which we can't parse:
// * first argument of lambda() replaced by fuzzer with
// something else, leading to constructs such as
// arrayMap((min(x) + 3) -> x + 1, ....)
// * internals of Enum replaced, leading to:
// Enum(equals(someFunction(y), 3)).
// And there are even the cases when we can parse the query, but
// it's logically incorrect and its formatting is a mess, such as
// when `lambda()` function gets substituted into a wrong place.
// To avoid dealing with these cases, run the check only for the
// queries we were able to successfully execute.
if (!have_error)
{
ASTPtr parsed_formatted_query;
try
{
const auto * tmp_pos = query_to_send.c_str();
parsed_formatted_query = parseQuery(tmp_pos,
tmp_pos + query_to_send.size(),
false /* allow_multi_statements */);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR)
{
throw;
}
}
if (parsed_formatted_query)
{
const auto formatted_twice
= parsed_formatted_query->formatForErrorMessage();
if (formatted_twice != query_to_send)
{
fmt::print(stderr, "The query formatting is broken.\n");
printChangedSettings();
fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
formatted_twice, query_to_send);
fmt::print(stderr, "In more detail:\n");
fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree());
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send);
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
exit(1);
}
}
}
// The server is still alive so we're going to continue fuzzing.
// Determine what we're going to use as the starting AST.
if (have_error)