check formatting only for the queries we can execute

2024-11-24 08:32:02 +00:00 · 2021-03-16 02:18:50 +03:00 · 2021-03-16 02:18:50 +03:00 · 6403198c84
commit 6403198c84
parent e485a27dc1
1 changed files with 61 additions and 59 deletions
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -1372,65 +1372,6 @@ private:
                    continue;
                }

-                // Check that the query is formatted properly and we can parse
-                // it back and format again and get the same result. Unfortunately
-                // we can't compare the ASTs, which would be more sensitive to
-                // errors. This double formatting check doesn't catch all errors,
-                // e.g. we can format query incorrectly, but to a valid SQL that
-                // we can then parse and format into the same SQL.
-                {
-                    ASTPtr parsed_formatted_query;
-                    try
-                    {
-                        const auto * tmp_pos = fuzzed_text.c_str();
-                        parsed_formatted_query = parseQuery(tmp_pos,
-                            tmp_pos + fuzzed_text.size(),
-                            false /* allow_multi_statements */);
-                    }
-                    catch (Exception & e)
-                    {
-                        // Some complicated cases where we can generate the SQL
-                        // which we can't parse:
-                        // * first argument of lambda() replaced by fuzzer with
-                        //   something else, leading to constructs such as
-                        //   arrayMap((min(x) + 3) -> x + 1, ....)
-                        // * internals of Enum replaced, leading to:
-                        //   Enum(equals(someFunction(y), 3)).
-                        // We could filter them on case-by-case basis, but they
-                        // are probably also helpful in that they test the parsing
-                        // errors, so let's just ignore them in this check and
-                        // send them to the server normally.
-                        if (e.code() != ErrorCodes::SYNTAX_ERROR)
-                        {
-                            throw;
-                        }
-                    }
-
-                    if (parsed_formatted_query)
-                    {
-                        const auto formatted_twice
-                            = parsed_formatted_query->formatForErrorMessage();
-
-                        if (formatted_twice != fuzzed_text)
-                        {
-                            fmt::print(stderr, "The query formatting is broken.\n");
-
-                            printChangedSettings();
-
-                            fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
-                                formatted_twice, fuzzed_text);
-                            fmt::print(stderr, "In more detail:\n");
-                            fmt::print(stderr, "AST-1:\n'{}'\n", ast_to_process->dumpTree());
-                            fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", fuzzed_text);
-                            fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
-                            fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
-                            fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
-
-                            exit(1);
-                        }
-                    }
-                }
-
                parsed_query = ast_to_process;
                query_to_send = parsed_query->formatForErrorMessage();

@ -1470,6 +1411,67 @@ private:
                return false;
            }

+            // Check that after the query is formatted, we can parse it back,
+            // format again and get the same result. Unfortunately, we can't
+            // compare the ASTs, which would be more sensitive to errors. This
+            // double formatting check doesn't catch all errors, e.g. we can
+            // format query incorrectly, but to a valid SQL that we can then
+            // parse and format into the same SQL.
+            // There are some complicated cases where we can generate the SQL
+            // which we can't parse:
+            // * first argument of lambda() replaced by fuzzer with
+            //   something else, leading to constructs such as
+            //   arrayMap((min(x) + 3) -> x + 1, ....)
+            // * internals of Enum replaced, leading to:
+            //   Enum(equals(someFunction(y), 3)).
+            // And there are even the cases when we can parse the query, but
+            // it's logically incorrect and its formatting is a mess, such as
+            // when `lambda()` function gets substituted into a wrong place.
+            // To avoid dealing with these cases, run the check only for the
+            // queries we were able to successfully execute.
+            if (!have_error)
+            {
+                ASTPtr parsed_formatted_query;
+                try
+                {
+                    const auto * tmp_pos = query_to_send.c_str();
+                    parsed_formatted_query = parseQuery(tmp_pos,
+                        tmp_pos + query_to_send.size(),
+                        false /* allow_multi_statements */);
+                }
+                catch (Exception & e)
+                {
+                    if (e.code() != ErrorCodes::SYNTAX_ERROR)
+                    {
+                        throw;
+                    }
+                }
+
+                if (parsed_formatted_query)
+                {
+                    const auto formatted_twice
+                        = parsed_formatted_query->formatForErrorMessage();
+
+                    if (formatted_twice != query_to_send)
+                    {
+                        fmt::print(stderr, "The query formatting is broken.\n");
+
+                        printChangedSettings();
+
+                        fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
+                            formatted_twice, query_to_send);
+                        fmt::print(stderr, "In more detail:\n");
+                        fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree());
+                        fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send);
+                        fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree());
+                        fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice);
+                        fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n");
+
+                        exit(1);
+                    }
+                }
+            }
+
            // The server is still alive so we're going to continue fuzzing.
            // Determine what we're going to use as the starting AST.
            if (have_error)