mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
check formatting only for the queries we can execute
This commit is contained in:
parent
e485a27dc1
commit
6403198c84
@ -1372,65 +1372,6 @@ private:
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check that the query is formatted properly and we can parse
|
||||
// it back and format again and get the same result. Unfortunately
|
||||
// we can't compare the ASTs, which would be more sensitive to
|
||||
// errors. This double formatting check doesn't catch all errors,
|
||||
// e.g. we can format query incorrectly, but to a valid SQL that
|
||||
// we can then parse and format into the same SQL.
|
||||
{
|
||||
ASTPtr parsed_formatted_query;
|
||||
try
|
||||
{
|
||||
const auto * tmp_pos = fuzzed_text.c_str();
|
||||
parsed_formatted_query = parseQuery(tmp_pos,
|
||||
tmp_pos + fuzzed_text.size(),
|
||||
false /* allow_multi_statements */);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
// Some complicated cases where we can generate the SQL
|
||||
// which we can't parse:
|
||||
// * first argument of lambda() replaced by fuzzer with
|
||||
// something else, leading to constructs such as
|
||||
// arrayMap((min(x) + 3) -> x + 1, ....)
|
||||
// * internals of Enum replaced, leading to:
|
||||
// Enum(equals(someFunction(y), 3)).
|
||||
// We could filter them on case-by-case basis, but they
|
||||
// are probably also helpful in that they test the parsing
|
||||
// errors, so let's just ignore them in this check and
|
||||
// send them to the server normally.
|
||||
if (e.code() != ErrorCodes::SYNTAX_ERROR)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (parsed_formatted_query)
|
||||
{
|
||||
const auto formatted_twice
|
||||
= parsed_formatted_query->formatForErrorMessage();
|
||||
|
||||
if (formatted_twice != fuzzed_text)
|
||||
{
|
||||
fmt::print(stderr, "The query formatting is broken.\n");
|
||||
|
||||
printChangedSettings();
|
||||
|
||||
fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
|
||||
formatted_twice, fuzzed_text);
|
||||
fmt::print(stderr, "In more detail:\n");
|
||||
fmt::print(stderr, "AST-1:\n'{}'\n", ast_to_process->dumpTree());
|
||||
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", fuzzed_text);
|
||||
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
|
||||
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
|
||||
fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
|
||||
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsed_query = ast_to_process;
|
||||
query_to_send = parsed_query->formatForErrorMessage();
|
||||
|
||||
@ -1470,6 +1411,67 @@ private:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that after the query is formatted, we can parse it back,
|
||||
// format again and get the same result. Unfortunately, we can't
|
||||
// compare the ASTs, which would be more sensitive to errors. This
|
||||
// double formatting check doesn't catch all errors, e.g. we can
|
||||
// format query incorrectly, but to a valid SQL that we can then
|
||||
// parse and format into the same SQL.
|
||||
// There are some complicated cases where we can generate the SQL
|
||||
// which we can't parse:
|
||||
// * first argument of lambda() replaced by fuzzer with
|
||||
// something else, leading to constructs such as
|
||||
// arrayMap((min(x) + 3) -> x + 1, ....)
|
||||
// * internals of Enum replaced, leading to:
|
||||
// Enum(equals(someFunction(y), 3)).
|
||||
// And there are even the cases when we can parse the query, but
|
||||
// it's logically incorrect and its formatting is a mess, such as
|
||||
// when `lambda()` function gets substituted into a wrong place.
|
||||
// To avoid dealing with these cases, run the check only for the
|
||||
// queries we were able to successfully execute.
|
||||
if (!have_error)
|
||||
{
|
||||
ASTPtr parsed_formatted_query;
|
||||
try
|
||||
{
|
||||
const auto * tmp_pos = query_to_send.c_str();
|
||||
parsed_formatted_query = parseQuery(tmp_pos,
|
||||
tmp_pos + query_to_send.size(),
|
||||
false /* allow_multi_statements */);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() != ErrorCodes::SYNTAX_ERROR)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (parsed_formatted_query)
|
||||
{
|
||||
const auto formatted_twice
|
||||
= parsed_formatted_query->formatForErrorMessage();
|
||||
|
||||
if (formatted_twice != query_to_send)
|
||||
{
|
||||
fmt::print(stderr, "The query formatting is broken.\n");
|
||||
|
||||
printChangedSettings();
|
||||
|
||||
fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
|
||||
formatted_twice, query_to_send);
|
||||
fmt::print(stderr, "In more detail:\n");
|
||||
fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree());
|
||||
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send);
|
||||
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
|
||||
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
|
||||
fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
|
||||
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The server is still alive so we're going to continue fuzzing.
|
||||
// Determine what we're going to use as the starting AST.
|
||||
if (have_error)
|
||||
|
Loading…
Reference in New Issue
Block a user