diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9ebb9e2c923..7accf530996 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -888,14 +888,18 @@ private: { ++token_iterator; } - if (token_iterator->begin >= all_queries_end) + + if (!token_iterator.isValid()) { break; } } + // Try to parse the query. const char * this_query_end = this_query_begin; + fmt::print(stderr, "left to parse: '{}'\n", std::string_view( + this_query_end, all_queries_end - this_query_end)); try { parsed_query = parseQuery(this_query_end, all_queries_end, true); @@ -925,6 +929,9 @@ private: continue; } + fmt::print(stderr, "parsed query: '{}'\n", std::string_view( + this_query_begin, this_query_end - this_query_begin)); + if (!parsed_query) { if (ignore_error) @@ -995,6 +1002,18 @@ private: //, where the inline data is delimited by semicolon and not // by a newline. this_query_end = parsed_query->as()->end; + // We also have to skip the trailing semicolon that might + // be left after VALUES parsing. + Tokens after_insert_tokens(this_query_end, + all_queries_end); + IParser::Pos after_insert_iterator(after_insert_tokens, + context.getSettingsRef().max_parser_depth); + while (after_insert_iterator.isValid() + && after_insert_iterator->type == TokenType::Semicolon) + { + this_query_end = after_insert_iterator->end; + ++after_insert_iterator; + } } } catch (...) diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 50faced19c3..18360ed29ae 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -4,12 +4,13 @@ namespace DB { -UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token * last) +UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last) { /// We have just two kind of parentheses: () and []. UnmatchedParentheses stack; - for (TokenIterator it = begin; it.isValid() && &it.get() <= last; ++it) + for (TokenIterator it = begin; + it.isValid() && it->begin <= last.begin; ++it) { if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) { diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 078421c99c9..a95465500e0 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -80,6 +80,6 @@ public: /// Returns positions of unmatched parentheses. using UnmatchedParentheses = std::vector; -UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token * last); +UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last); } diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 9daedc732e2..0e8115a334b 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -249,14 +249,80 @@ ASTPtr tryParseQuery( return nullptr; } + fmt::print(stderr, "before parsing: '{}'\n", + std::string_view(pos, end - pos)); + Expected expected; ASTPtr res; bool parse_res = parser.parse(token_iterator, res, expected); - Token last_token = token_iterator.max(); + const Token last_token = token_iterator.max(); + + const auto * query_begin = pos; pos = last_token.end; + fmt::print(stderr, "parse res {}, ast {}\n", parse_res, + static_cast(res.get())); + + // If parsed query ends at data for insertion. Data for insertion could be + // in any format and not necessary be lexical correct, so we can't perform + // most of the checks. + ASTInsertQuery * insert = nullptr; + if (parse_res) + insert = res->as(); + + if (insert && insert->data) + { + if (!parse_res) + { + // Generic parse error. + out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); + return nullptr; + } + + return res; + } + + // More granular checks for queries other than INSERT w/inline data. + /// Lexical error + if (last_token.isError()) + { + out_error_message = getLexicalErrorMessage(pos, end, last_token, hilite, query_description); + return nullptr; + } + + /// Unmatched parentheses + UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token); + if (!unmatched_parens.empty()) + { + out_error_message = getUnmatchedParenthesesErrorMessage(pos, end, unmatched_parens, hilite, query_description); + return nullptr; + } + + // If multi-statements are not allowed, then after semicolon, there must + // be no non-space characters. + if (!allow_multi_statements + && !token_iterator->isEnd()) + { + out_error_message = getSyntaxErrorMessage(pos, end, last_token, {}, hilite, + (query_description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed"); + return nullptr; + } + + if (!parse_res) + { + /// Generic parse error. + out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); + return nullptr; + } + + // The query was parsed correctly, but now we have to do some extra work to + // determine where the next query begins, preserving its leading comments. + + fmt::print(stderr, "before adding newline: '{}'\n", + std::string_view(query_begin, pos - query_begin)); + // The query may also contain a test hint comment in the same line, e.g. // select nonexistent_column; -- { serverError 12345 }. // We must add this comment to the query text, so that it is handled by the @@ -265,9 +331,9 @@ ASTPtr tryParseQuery( // newline in the string manually. If it's earlier than the next significant // token, it means that the text before newline is some trailing whitespace // or comment, and we should add it to our query. - const auto newline = find_first_symbols<'\n'>(pos, end); + const auto * newline = find_first_symbols<'\n'>(pos, end); TokenIterator next_token_iterator = token_iterator; - const auto next_token_begin = + const auto * next_token_begin = (next_token_iterator.isValid() && (++next_token_iterator).isValid()) ? (*next_token_iterator).begin : end; @@ -276,58 +342,8 @@ ASTPtr tryParseQuery( pos = newline; } - /// If parsed query ends at data for insertion. Data for insertion could be in any format and not necessary be lexical correct. - ASTInsertQuery * insert = nullptr; - if (parse_res) - insert = res->as(); - - if (!(insert && insert->data)) - { - /// Lexical error - if (last_token.isError()) - { - out_error_message = getLexicalErrorMessage(pos, end, last_token, hilite, query_description); - return nullptr; - } - - /// Unmatched parentheses - UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), &last_token); - if (!unmatched_parens.empty()) - { - out_error_message = getUnmatchedParenthesesErrorMessage(pos, end, unmatched_parens, hilite, query_description); - return nullptr; - } - } - - if (!parse_res) - { - /// Parse error. - out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); - return nullptr; - } - - /// Excessive input after query. Parsed query must end with end of data or semicolon or data for INSERT. - if (!token_iterator->isEnd() - && token_iterator->type != TokenType::Semicolon - && !(insert && insert->data)) - { - expected.add(pos, "end of query"); - out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); - return nullptr; - } - - while (token_iterator->type == TokenType::Semicolon) - ++token_iterator; - - /// If multi-statements are not allowed, then after semicolon, there must be no non-space characters. - if (!allow_multi_statements - && !token_iterator->isEnd() - && !(insert && insert->data)) - { - out_error_message = getSyntaxErrorMessage(pos, end, last_token, {}, hilite, - (query_description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed"); - return nullptr; - } + fmt::print(stderr, "final: '{}'\n", + std::string_view(query_begin, pos - query_begin)); return res; }