From e37546dd416cc27537256360d6e2309cfe3afbbc Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 28 Aug 2024 16:36:10 +0000 Subject: [PATCH] Fixing tuple/array literal parsing --- src/Parsers/ExpressionElementParsers.cpp | 35 ++++++++++++++---- src/Parsers/ExpressionElementParsers.h | 6 ++++ src/Parsers/tests/gtest_Parser.cpp | 46 ++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 61b5723072e..9b9a24f387e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1289,12 +1289,16 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte template struct CollectionOfLiteralsLayer { - explicit CollectionOfLiteralsLayer(IParser::Pos & pos) : literal_begin(pos) + explicit CollectionOfLiteralsLayer(IParser::Pos & pos, TokenType closing_bracket_) + : literal_begin(pos) + , closing_bracket(closing_bracket_) { + pos.increaseDepth(); ++pos; } IParser::Pos literal_begin; + TokenType closing_bracket; Collection arr; }; @@ -1305,8 +1309,7 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, return false; std::vector> layers; - layers.emplace_back(pos); - pos.increaseDepth(); + layers.emplace_back(pos, closing_bracket); ParserLiteral literal_p; @@ -1314,13 +1317,22 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, { if (!layers.back().arr.empty()) { - if (pos->type == closing_bracket) + if (pos->type == layers.back().closing_bracket) { std::shared_ptr literal; - /// Parse one-element tuples (e.g. (1)) later as single values for backward compatibility. + // /// Parse one-element tuples (e.g. (1)) later as single values for backward compatibility. if (std::is_same_v && layers.back().arr.size() == 1) + { + if (layers.size() > 1) + { + layers[layers.size() - 2].arr.push_back(layers.back().arr[0]); + layers.pop_back(); + pos.decreaseDepth(); + continue; + } return false; + } literal = std::make_shared(std::move(layers.back().arr)); literal->begin = layers.back().literal_begin; @@ -1360,8 +1372,17 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, } else if (pos->type == opening_bracket) { - layers.emplace_back(pos); - pos.increaseDepth(); + layers.emplace_back(pos, closing_bracket); + } + else if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->end) == FunctionName::value) + { + ++pos; + if (pos.isValid() && pos->type == TokenType::OpeningRoundBracket) + { + layers.emplace_back(pos, TokenType::ClosingRoundBracket); + } + else + return false; } else return false; diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 903111f32db..f4dcbfe9657 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -313,6 +313,12 @@ protected: private: TokenType opening_bracket; TokenType closing_bracket; + + template struct FunctionName; + template <> struct FunctionName { static constexpr auto value = "array"; }; + template <> struct FunctionName { static constexpr auto value = "tuple"; }; + + std::string_view function_name = FunctionName::value; }; /// A tuple of literals with same type. diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index f0abc68f966..c072e9288dc 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -662,3 +662,49 @@ INSTANTIATE_TEST_SUITE_P( "WITH\n table_1 AS\n (\n SELECT\n country,\n city,\n c + some_derived_value AS _expr_1\n FROM matches\n WHERE start_date > toDate('2023-05-30')\n ),\n table_0 AS\n (\n SELECT\n country,\n city,\n AVG(_expr_1) AS _expr_0,\n MAX(_expr_1) AS aggr\n FROM table_1\n WHERE _expr_1 > 0\n GROUP BY\n country,\n city\n )\nSELECT\n country,\n city,\n _expr_0,\n aggr,\n CONCAT(city, ' in ', country) AS place,\n LEFT(country, 2) AS country_code\nFROM table_0\nORDER BY\n aggr ASC,\n country DESC\nLIMIT 20", }, }))); + +namespace DB { std::ostream & operator<<(std::ostream & stream, const Field & field) { return stream << field.dump(); } } + +TEST(ParserTest, parseTupleLiterals) +try +{ + ParserTupleOfLiterals parser; + + auto parse_tuple = [&](std::string_view text) -> std::optional + { + try + { + auto ast = parseQuery(parser, text.begin(), text.end(), 0, 0, 0); + const auto * literal = typeid_cast(ast.get()); + if (!literal || literal->value.getType() != Field::Types::Tuple) + return {}; + return literal->value.safeGet(); + } + catch (const DB::Exception & e) + { + std::cerr << e.displayText() << std::endl; + return {}; + } + }; + + const auto & tuple = parse_tuple("((1, 2), (3, 4))"); + ASSERT_TRUE(tuple && tuple->size() == 2); + + std::vector test_cases = { + // "((1, 2,), (3, 4,),)", + // " ( (\t\t 1 \r\n , 2 ,\n ) \t\t, ( 3 , 4 \t, ) , ) ", + "(((1, 2)), (3, 4))", + "(((1), 2), (3, 4))", + "(tuple(1, 2), (3, 4))", + }; + for (size_t i = 0; i < test_cases.size(); ++i) + { + SCOPED_TRACE(fmt::format("Test case #{}: {}", i + 1, test_cases[i])); + EXPECT_EQ(tuple, parse_tuple(test_cases[i])); + } +} +catch (...) +{ + std::cerr << getCurrentExceptionMessage(true) << std::endl; + throw; +}