From 00ac3231b290863a362c7d324de33cb1a9fc5566 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Apr 2022 15:10:50 +0200 Subject: [PATCH 1/7] Fix broken aliases during parsing of special operators --- src/Parsers/ExpressionElementParsers.cpp | 83 ++++++++++------- ...ecial_operator_parse_alias_check.reference | 35 +++++++ ...267_special_operator_parse_alias_check.sql | 92 +++++++++++++++++++ 3 files changed, 175 insertions(+), 35 deletions(-) create mode 100644 tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference create mode 100644 tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 29c7846283e..417f7501c4e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -303,7 +303,7 @@ namespace ASTPtr expr_node; ASTPtr type_node; - if (ParserExpression().parse(pos, expr_node, expected)) + if (ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) { if (ParserKeyword("AS").ignore(pos, expected)) { @@ -315,7 +315,7 @@ namespace } else if (ParserToken(TokenType::Comma).ignore(pos, expected)) { - if (ParserExpression().parse(pos, type_node, expected)) + if (ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) { node = makeASTFunction("CAST", expr_node, type_node); return true; @@ -335,7 +335,7 @@ namespace ASTPtr start_node; ASTPtr length_node; - if (!ParserExpression().parse(pos, expr_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) return false; if (pos->type != TokenType::Comma) @@ -348,7 +348,7 @@ namespace ++pos; } - if (!ParserExpression().parse(pos, start_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, start_node, expected)) return false; if (pos->type != TokenType::ClosingRoundBracket) @@ -363,7 +363,7 @@ namespace ++pos; } - if (!ParserExpression().parse(pos, length_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected)) return false; } @@ -378,7 +378,7 @@ namespace bool parseTrim(bool trim_left, bool trim_right, IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /// Handles all possible TRIM/LTRIM/RTRIM call variants + /// Handles all possible TRIM/LTRIM/RTRIM call variants ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) std::string func_name; bool char_override = false; @@ -412,7 +412,7 @@ namespace if (char_override) { - if (!ParserExpression().parse(pos, to_remove, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, to_remove, expected)) return false; if (!ParserKeyword("FROM").ignore(pos, expected)) return false; @@ -429,7 +429,7 @@ namespace } } - if (!ParserExpression().parse(pos, expr_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) return false; /// Convert to regexp replace function call @@ -506,6 +506,9 @@ namespace bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected) { + /// First try to match with date extract operator EXTRACT(part FROM date) + /// Then with function extract(haystack, pattern) + IParser::Pos begin = pos; IntervalKind interval_kind; @@ -514,7 +517,7 @@ namespace ASTPtr expr; ParserKeyword s_from("FROM"); - ParserExpression elem_parser; + ParserExpressionWithOptionalAlias elem_parser(true /*allow_alias_without_as_keyword*/); if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected)) { @@ -526,7 +529,7 @@ namespace pos = begin; ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) + if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) return false; auto res = std::make_shared(); @@ -539,8 +542,11 @@ namespace bool parsePosition(IParser::Pos & pos, ASTPtr & node, Expected & expected) { + /// First try to match with position(needle IN haystack) + /// Then with position(haystack, needle[, start_pos]) + ASTPtr expr_list_node; - if (!ParserExpressionList(false, false).parse(pos, expr_list_node, expected)) + if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list_node, expected)) return false; ASTExpressionList * expr_list = typeid_cast(expr_list_node.get()); @@ -568,6 +574,9 @@ namespace bool parseDateAdd(const char * function_name, IParser::Pos & pos, ASTPtr & node, Expected & expected) { + /// First to match with function(unit, offset, timestamp) + /// Then with function(offset, timestamp) + ASTPtr timestamp_node; ASTPtr offset_node; @@ -575,19 +584,18 @@ namespace ASTPtr interval_func_node; if (parseIntervalKind(pos, expected, interval_kind)) { - /// function(unit, offset, timestamp) if (pos->type != TokenType::Comma) return false; ++pos; - if (!ParserExpression().parse(pos, offset_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, offset_node, expected)) return false; if (pos->type != TokenType::Comma) return false; ++pos; - if (!ParserExpression().parse(pos, timestamp_node, expected)) + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timestamp_node, expected)) return false; auto interval_expr_list_args = std::make_shared(); interval_expr_list_args->children = {offset_node}; @@ -600,7 +608,7 @@ namespace else { ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) + if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) return false; auto res = std::make_shared(); @@ -617,39 +625,44 @@ namespace bool parseDateDiff(IParser::Pos & pos, ASTPtr & node, Expected & expected) { + /// First to match with dateDiff(unit, startdate, enddate, [timezone]) + /// Then with dateDiff('unit', startdate, enddate, [timezone]) + ASTPtr left_node; ASTPtr right_node; IntervalKind interval_kind; - if (!parseIntervalKind(pos, expected, interval_kind)) + if (parseIntervalKind(pos, expected, interval_kind)) { - ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) + if (pos->type != TokenType::Comma) + return false; + ++pos; + + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, left_node, expected)) return false; - auto res = std::make_shared(); - res->name = "dateDiff"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); + if (pos->type != TokenType::Comma) + return false; + ++pos; + + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, right_node, expected)) + return false; + + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), left_node, right_node); + return true; } - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpression().parse(pos, left_node, expected)) + ASTPtr expr_list; + if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) return false; - if (pos->type != TokenType::Comma) - return false; - ++pos; + auto res = std::make_shared(); + res->name = "dateDiff"; + res->arguments = expr_list; + res->children.push_back(res->arguments); + node = std::move(res); - if (!ParserExpression().parse(pos, right_node, expected)) - return false; - - node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), left_node, right_node); return true; } diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference new file mode 100644 index 00000000000..de389bc6382 --- /dev/null +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference @@ -0,0 +1,35 @@ +1234 1234 UInt32 +1234 1234 UInt32 +1234 1234 +1234 1234 +234 1234 2 3 +234 1234 2 3 +234 1234 2 +234 1234 2 +234 1234 2 3 +234 1234 2 3 +bca a abca +bca a abca +abc a abca +abc a abca +bc a abca +bc a abca +5 2019-05-05 +5 2019-05-05 +123 1234 123 +123 1234 123 +1 123 1234 +0 123 1234 +0 123 1234 +2019-05-06 1 2019-05-05 +2019-05-06 1 2019-05-05 +2019-05-06 1 2019-05-05 +2019-05-06 1 2019-05-05 +2019-05-04 1 2019-05-05 +2019-05-04 1 2019-05-05 +2019-05-04 1 2019-05-05 +2019-05-04 1 2019-05-05 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql new file mode 100644 index 00000000000..27fb3d22197 --- /dev/null +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql @@ -0,0 +1,92 @@ +-- CAST expression + +SELECT cast('1234' AS lhs, 'UInt32' AS rhs), lhs, rhs; +SELECT cast('1234' lhs, 'UInt32' rhs), lhs, rhs; +SELECT cast('1234' lhs AS UInt32), lhs; +SELECT cast('1234' AS lhs AS UInt32), lhs; + +-- SUBSTRING expression + +-- SUBSTRING(expr, start, length) + +SELECT substring('1234' AS arg_1, 2 AS arg_2, 3 AS arg_3), arg_1, arg_2, arg_3; +SELECT substring('1234' arg_1, 2 arg_2, 3 arg_3), arg_1, arg_2, arg_3; + +-- SUBSTRING(expr FROM start) + +SELECT substring('1234' AS arg_1 FROM 2 AS arg_2), arg_1, arg_2; +SELECT substring('1234' arg_1 FROM 2 arg_2), arg_1, arg_2; + +-- SUBSTRING(expr FROM start FOR length) + +SELECT substring('1234' AS arg_1 FROM 2 AS arg_2 FOR 3 AS arg_3), arg_1, arg_2, arg_3; +SELECT substring('1234' arg_1 FROM 2 arg_2 FOR 3 arg_3), arg_1, arg_2, arg_3; + + +-- TRIM expression ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) + +SELECT trim(LEADING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2; +SELECT trim(LEADING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2; + +SELECT trim(TRAILING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2; +SELECT trim(TRAILING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2; + +SELECT trim(BOTH 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2; +SELECT trim(BOTH 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2; + +-- EXTRACT expression + +-- EXTRACT(part FROM date) + +SELECT EXTRACT(DAY FROM toDate('2019-05-05') as arg_1), arg_1; +SELECT EXTRACT(DAY FROM toDate('2019-05-05') arg_1), arg_1; + +-- Function extract(haystack, pattern) + +SELECT extract('1234' AS arg_1, '123' AS arg_2), arg_1, arg_2; +SELECT extract('1234' arg_1, '123' arg_2), arg_1, arg_2; + +-- POSITION expression + +-- position(needle IN haystack) + +SELECT position(('123' AS arg_1) IN ('1234' AS arg_2)), arg_1, arg_2; + +-- position(haystack, needle[, start_pos]) + +SELECT position('123' AS arg_1, '1234' AS arg_2), arg_1, arg_2; +SELECT position('123' arg_1, '1234' arg_2), arg_1, arg_2; + +-- dateAdd, dateSub expressions + +-- function(unit, offset, timestamp) + +SELECT dateAdd(DAY, 1 AS arg_1, toDate('2019-05-05') AS arg_2), arg_1, arg_2; +SELECT dateAdd(DAY, 1 arg_1, toDate('2019-05-05') arg_2), arg_1, arg_2; + +-- function(offset, timestamp) + +SELECT dateAdd(DAY, 1 AS arg_1, toDate('2019-05-05') AS arg_2), arg_1, arg_2; +SELECT dateAdd(DAY, 1 arg_1, toDate('2019-05-05') arg_2), arg_1, arg_2; + +-- function(unit, offset, timestamp) + +SELECT dateSub(DAY, 1 AS arg_1, toDate('2019-05-05') AS arg_2), arg_1, arg_2; +SELECT dateSub(DAY, 1 arg_1, toDate('2019-05-05') arg_2), arg_1, arg_2; + +-- function(offset, timestamp) + +SELECT dateSub(DAY, 1 AS arg_1, toDate('2019-05-05') AS arg_2), arg_1, arg_2; +SELECT dateSub(DAY, 1 arg_1, toDate('2019-05-05') arg_2), arg_1, arg_2; + +-- dateDiff expression + +-- dateDiff(unit, startdate, enddate, [timezone]) + +SELECT dateDiff(DAY, toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2), arg_1, arg_2; +SELECT dateDiff(DAY, toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2), arg_1, arg_2; + +-- dateDiff('unit', startdate, enddate, [timezone]) + +SELECT dateDiff('DAY', toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2), arg_1, arg_2; +SELECT dateDiff('DAY', toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2), arg_1, arg_2; From 90e3a7587493b02c1f21d3e333b560a084effd5d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 13 Apr 2022 16:36:06 +0200 Subject: [PATCH 2/7] Fixed tests --- src/Parsers/ExpressionElementParsers.cpp | 38 ++++++++++++++----- ...ecial_operator_parse_alias_check.reference | 1 - ...267_special_operator_parse_alias_check.sql | 5 ++- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 417f7501c4e..437e467bc09 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -298,28 +298,46 @@ namespace { bool parseCastAs(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /// expr AS type + /** Possible variants for cast operator + * First try to match with cast(expr AS Type); + * Then try to match with cast(expr [[as] alias_1], alias_expr [[as] alias_2]); + * + * We need to check if after keyword AS there is identifier followed by comma, + * if it is then it is not cast(expr AS Type) + */ ASTPtr expr_node; ASTPtr type_node; + ASTPtr identifier_node; - if (ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) + if (ParserExpression().parse(pos, expr_node, expected)) { - if (ParserKeyword("AS").ignore(pos, expected)) + bool parse_as = ParserKeyword("AS").ignore(pos, expected); + + if (parse_as) { - if (ParserDataType().parse(pos, type_node, expected)) + auto begin = pos; + auto expected_copy = expected; + bool next_identifier_with_comma + = ParserIdentifier().ignore(begin, expected_copy) && ParserToken(TokenType::Comma).ignore(begin, expected_copy); + + if (!next_identifier_with_comma && ParserDataType().parse(pos, type_node, expected)) { node = createFunctionCast(expr_node, type_node); return true; } } - else if (ParserToken(TokenType::Comma).ignore(pos, expected)) + + if (ParserIdentifier().parse(pos, identifier_node, expected)) + expr_node->setAlias(getIdentifierName(identifier_node)); + else if (parse_as) + return false; + + if (ParserToken(TokenType::Comma).ignore(pos, expected) + && ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) { - if (ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) - { - node = makeASTFunction("CAST", expr_node, type_node); - return true; - } + node = makeASTFunction("CAST", expr_node, type_node); + return true; } } diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference index de389bc6382..18191d9d315 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference @@ -1,7 +1,6 @@ 1234 1234 UInt32 1234 1234 UInt32 1234 1234 -1234 1234 234 1234 2 3 234 1234 2 3 234 1234 2 diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql index 27fb3d22197..ca786910882 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql @@ -2,8 +2,9 @@ SELECT cast('1234' AS lhs, 'UInt32' AS rhs), lhs, rhs; SELECT cast('1234' lhs, 'UInt32' rhs), lhs, rhs; -SELECT cast('1234' lhs AS UInt32), lhs; -SELECT cast('1234' AS lhs AS UInt32), lhs; +SELECT cast(('1234' AS lhs) AS UInt32), lhs; +SELECT cast('1234' AS lhs AS UInt32), lhs; --{clientError 62} +SELECT cast('1234' lhs AS UInt32), lhs; --{clientError 62} -- SUBSTRING expression From fb28791d83fa5b2bb86d8a0a6441a315d6fce20d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Apr 2022 16:13:34 +0200 Subject: [PATCH 3/7] Fixed tests --- src/Parsers/ExpressionElementParsers.cpp | 206 ++++++++++++------ .../0_stateless/02154_parser_backtracking.sh | 12 +- ...ecial_operator_parse_alias_check.reference | 30 ++- ...267_special_operator_parse_alias_check.sql | 54 +++-- 4 files changed, 214 insertions(+), 88 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 437e467bc09..50e8002e6d0 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -298,12 +298,9 @@ namespace { bool parseCastAs(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /** Possible variants for cast operator - * First try to match with cast(expr AS Type); - * Then try to match with cast(expr [[as] alias_1], alias_expr [[as] alias_2]); - * - * We need to check if after keyword AS there is identifier followed by comma, - * if it is then it is not cast(expr AS Type) + /** Possible variants for cast operator cast(expr [[AS] alias_1] AS Type), cast(expr [[AS] alias_1], type_expr [[as] alias_2]). + * First try to match with cast(expr [[AS] alias_1] AS Type) + * Then try to match with cast(expr [[AS] alias_1], type_expr [[as] alias_2]). */ ASTPtr expr_node; @@ -312,14 +309,35 @@ namespace if (ParserExpression().parse(pos, expr_node, expected)) { - bool parse_as = ParserKeyword("AS").ignore(pos, expected); + ParserKeyword as_keyword_parser("AS"); + bool parse_as = as_keyword_parser.ignore(pos, expected); + + /// CAST (a b AS UInt32) OR CAST (a b, expr) + + if (!parse_as && ParserIdentifier().parse(pos, identifier_node, expected)) + { + expr_node->setAlias(getIdentifierName(identifier_node)); + parse_as = as_keyword_parser.ignore(pos, expected); + } if (parse_as) { + /// CAST (a AS Type) OR CAST (a AS b AS Type) OR CAST (a AS b, expr) + auto begin = pos; auto expected_copy = expected; - bool next_identifier_with_comma - = ParserIdentifier().ignore(begin, expected_copy) && ParserToken(TokenType::Comma).ignore(begin, expected_copy); + bool next_identifier = ParserIdentifier().ignore(begin, expected_copy); + bool next_identifier_with_comma = next_identifier && ParserToken(TokenType::Comma).ignore(begin, expected_copy); + bool next_identifier_with_as + = next_identifier && !next_identifier_with_comma && as_keyword_parser.ignore(begin, expected_copy); + + if (next_identifier_with_as) + { + if (ParserIdentifier().parse(pos, identifier_node, expected) && as_keyword_parser.ignore(pos, expected)) + expr_node->setAlias(getIdentifierName(identifier_node)); + else + return false; + } if (!next_identifier_with_comma && ParserDataType().parse(pos, type_node, expected)) { @@ -328,10 +346,15 @@ namespace } } - if (ParserIdentifier().parse(pos, identifier_node, expected)) - expr_node->setAlias(getIdentifierName(identifier_node)); - else if (parse_as) - return false; + /// CAST(a AS b, expr) + + if (parse_as) + { + if (ParserIdentifier().parse(pos, identifier_node, expected)) + expr_node->setAlias(getIdentifierName(identifier_node)); + else + return false; + } if (ParserToken(TokenType::Comma).ignore(pos, expected) && ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) @@ -349,42 +372,75 @@ namespace /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) /// The latter will be parsed normally as a function later. + ParserKeyword as_keyword_parser("AS"); + ParserIdentifier identifier_parser; + ASTPtr expr_node; ASTPtr start_node; ASTPtr length_node; - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) + if (!ParserExpression().parse(pos, expr_node, expected)) return false; - if (pos->type != TokenType::Comma) + auto from_keyword_parser = ParserKeyword("FROM"); + bool from_exists = from_keyword_parser.check(pos, expected); + + if (!from_exists && pos->type != TokenType::Comma) { - if (!ParserKeyword("FROM").ignore(pos, expected)) + ASTPtr identifier_node; + bool parsed_as = as_keyword_parser.ignore(pos, expected); + bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); + + if (parsed_as && !parsed_identifer) return false; + + if (parsed_identifer) + expr_node->setAlias(getIdentifierName(identifier_node)); + + from_exists = from_keyword_parser.check(pos, expected); } - else + + if (pos->type == TokenType::Comma) { + if (from_exists) + return false; + ++pos; } - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, start_node, expected)) + if (!ParserExpression().parse(pos, start_node, expected)) return false; - if (pos->type != TokenType::ClosingRoundBracket) + auto for_keyword_parser = ParserKeyword("FOR"); + bool for_exists = for_keyword_parser.check(pos, expected); + if (!for_exists && pos->type != TokenType::Comma) { - if (pos->type != TokenType::Comma) - { - if (!ParserKeyword("FOR").ignore(pos, expected)) - return false; - } - else - { - ++pos; - } - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected)) + ASTPtr identifier_node; + bool parsed_as = as_keyword_parser.ignore(pos, expected); + bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); + if (parsed_as && !parsed_identifer) return false; + + if (parsed_identifer) + start_node->setAlias(getIdentifierName(identifier_node)); + + for_exists = for_keyword_parser.check(pos, expected); } + bool need_parse_length_expression = for_exists; + if (pos->type == TokenType::Comma) + { + if (for_exists) + return false; + + ++pos; + need_parse_length_expression = true; + } + + if (need_parse_length_expression + && !ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected)) + return false; + /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length) if (length_node) node = makeASTFunction("substring", expr_node, start_node, length_node); @@ -430,9 +486,28 @@ namespace if (char_override) { - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, to_remove, expected)) + if (!ParserExpression().parse(pos, to_remove, expected)) return false; - if (!ParserKeyword("FROM").ignore(pos, expected)) + + auto from_keyword_parser = ParserKeyword("FROM"); + bool from_exists = from_keyword_parser.check(pos, expected); + + if (!from_exists) + { + ASTPtr identifier_node; + bool parsed_as = ParserKeyword("AS").ignore(pos, expected); + bool parsed_identifer = ParserIdentifier().parse(pos, identifier_node, expected); + + if (parsed_as && !parsed_identifer) + return false; + + if (parsed_identifer) + to_remove->setAlias(getIdentifierName(identifier_node)); + + from_exists = from_keyword_parser.check(pos, expected); + } + + if (!from_exists) return false; auto quote_meta_func_node = std::make_shared(); @@ -458,33 +533,24 @@ namespace auto pattern_list_args = std::make_shared(); if (trim_left && trim_right) { - pattern_list_args->children = { - std::make_shared("^["), - to_remove, - std::make_shared("]+|["), - to_remove, - std::make_shared("]+$") - }; + pattern_list_args->children + = {std::make_shared("^["), + to_remove, + std::make_shared("]+|["), + to_remove, + std::make_shared("]+$")}; func_name = "replaceRegexpAll"; } else { if (trim_left) { - pattern_list_args->children = { - std::make_shared("^["), - to_remove, - std::make_shared("]+") - }; + pattern_list_args->children = {std::make_shared("^["), to_remove, std::make_shared("]+")}; } else { /// trim_right == false not possible - pattern_list_args->children = { - std::make_shared("["), - to_remove, - std::make_shared("]+$") - }; + pattern_list_args->children = {std::make_shared("["), to_remove, std::make_shared("]+$")}; } func_name = "replaceRegexpOne"; } @@ -563,28 +629,42 @@ namespace /// First try to match with position(needle IN haystack) /// Then with position(haystack, needle[, start_pos]) - ASTPtr expr_list_node; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list_node, expected)) - return false; + ParserExpressionWithOptionalAlias expr_parser(true /*allow_alias_without_as_keyword*/); - ASTExpressionList * expr_list = typeid_cast(expr_list_node.get()); - if (expr_list && expr_list->children.size() == 1) + ASTPtr first_arg_expr_node; + if (!expr_parser.parse(pos, first_arg_expr_node, expected)) { - ASTFunction * func_in = typeid_cast(expr_list->children[0].get()); - if (func_in && func_in->name == "in") + return false; + } + + ASTFunction * func_in = typeid_cast(first_arg_expr_node.get()); + if (func_in && func_in->name == "in") + { + ASTExpressionList * in_args = typeid_cast(func_in->arguments.get()); + if (in_args && in_args->children.size() == 2) { - ASTExpressionList * in_args = typeid_cast(func_in->arguments.get()); - if (in_args && in_args->children.size() == 2) - { - node = makeASTFunction("position", in_args->children[1], in_args->children[0]); - return true; - } + node = makeASTFunction("position", in_args->children[1], in_args->children[0]); + return true; } } + if (pos->type != TokenType::Comma) + return false; + ++pos; + + ASTPtr second_arg_expr_node; + if (!expr_parser.parse(pos, second_arg_expr_node, expected)) + { + return false; + } + + auto arguments = std::make_shared(); + arguments->children.push_back(std::move(first_arg_expr_node)); + arguments->children.push_back(std::move(second_arg_expr_node)); + auto res = std::make_shared(); res->name = "position"; - res->arguments = expr_list_node; + res->arguments = std::move(arguments); res->children.push_back(res->arguments); node = std::move(res); return true; diff --git a/tests/queries/0_stateless/02154_parser_backtracking.sh b/tests/queries/0_stateless/02154_parser_backtracking.sh index af032008069..fd227bcfc56 100755 --- a/tests/queries/0_stateless/02154_parser_backtracking.sh +++ b/tests/queries/0_stateless/02154_parser_backtracking.sh @@ -12,14 +12,14 @@ echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'a, b'"$(perl -e 'print ")" x 10 echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'a AS b'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'1'"$(perl -e 'print ", '"'UInt8'"')" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'1'"$(perl -e 'print " AS UInt8)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 - echo "SELECT fo,22222?LUTAY(SELECT(NOT CAUTAY(SELECT(NOT CAST(NOTT(NOT CAST(NOT NOT LEfT(NOT coARRAYlumnsFLuTAY(SELECT(NO0?LUTAY(SELECT(NOT CAUTAY(SELECT(NOT CAST(NOTT(NOT CAST(NOT NOT LEfT(NOT coARRAYlumnsFLuTAY(SELECT(NOTAYTAY(SELECT(NOTAYEFAULT(fo,22222?LUTAY(%SELECT(NOT CAST(NOT NOTAYTAY(SELECT(NOTAYEFAULT(fo,22222?LUTAY(SELECT(NOT CAST(NOT NOT (NOe)))))))))))))))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' -echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' -echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' -echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b, c))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' -echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' +echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' +echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' +echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b, c))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' + +echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' -echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x IN y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' +echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x IN y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error' echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print " IN x)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ", x)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER' diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference index 18191d9d315..bf33e71e45e 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference @@ -1,14 +1,36 @@ -1234 1234 UInt32 -1234 1234 UInt32 +1234 1234 1234 -234 1234 2 3 -234 1234 2 3 +1234 1234 +1234 1234 +1234 1234 +1234 1234 +1234 +1234 1234 +1234 1234 +1234 UInt32 +1234 UInt32 +1234 1234 UInt32 +1234 1234 UInt32 +234 +234 1234 +234 1234 +234 2 +234 2 234 1234 2 234 1234 2 +234 1234 2 +23 +23 2 +23 2 234 1234 2 3 234 1234 2 3 bca a abca bca a abca +bca abca +bca abca +bca a +bca a +bca abc a abca abc a abca bc a abca diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql index ca786910882..6f95131315f 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql @@ -1,33 +1,57 @@ -- CAST expression +-- cast(expr [[AS] alias_1] AS Type) + +SELECT cast('1234' AS UInt32); +SELECT cast('1234' AS lhs AS UInt32), lhs; +SELECT cast('1234' lhs AS UInt32), lhs; +SELECT cast(('1234' AS lhs) AS UInt32), lhs; +SELECT cast(('1234' AS lhs) rhs AS UInt32), rhs; +SELECT cast(('1234' AS lhs) AS rhs AS UInt32), rhs; + +-- cast(expr [[AS] alias_1], type_expr [[as] alias_2]) + +SELECT cast('1234', 'UInt32'); +SELECT cast('1234' AS lhs, 'UInt32'), lhs; +SELECT cast('1234' lhs, 'UInt32'), lhs; +SELECT cast('1234', 'UInt32' AS rhs), rhs; +SELECT cast('1234', 'UInt32' rhs), rhs; SELECT cast('1234' AS lhs, 'UInt32' AS rhs), lhs, rhs; SELECT cast('1234' lhs, 'UInt32' rhs), lhs, rhs; -SELECT cast(('1234' AS lhs) AS UInt32), lhs; -SELECT cast('1234' AS lhs AS UInt32), lhs; --{clientError 62} -SELECT cast('1234' lhs AS UInt32), lhs; --{clientError 62} -- SUBSTRING expression +-- SUBSTRING(expr FROM start) + +SELECT substring('1234' FROM 2); +SELECT substring('1234' AS lhs FROM 2), lhs; +SELECT substring('1234' lhs FROM 2), lhs; +SELECT substring('1234' FROM 2 AS rhs), rhs; +SELECT substring('1234' FROM 2 rhs), rhs; +SELECT substring('1234' AS lhs FROM 2 AS rhs), lhs, rhs; +SELECT substring('1234' lhs FROM 2 rhs), lhs, rhs; +SELECT substring(('1234' AS lhs) FROM (2 AS rhs)), lhs, rhs; + +-- SUBSTRING(expr FROM start FOR length) + +SELECT substring('1234' FROM 2 FOR 2); +SELECT substring('1234' FROM 2 FOR 2 AS lhs), lhs; +SELECT substring('1234' FROM 2 FOR 2 lhs), lhs; + -- SUBSTRING(expr, start, length) SELECT substring('1234' AS arg_1, 2 AS arg_2, 3 AS arg_3), arg_1, arg_2, arg_3; SELECT substring('1234' arg_1, 2 arg_2, 3 arg_3), arg_1, arg_2, arg_3; --- SUBSTRING(expr FROM start) - -SELECT substring('1234' AS arg_1 FROM 2 AS arg_2), arg_1, arg_2; -SELECT substring('1234' arg_1 FROM 2 arg_2), arg_1, arg_2; - --- SUBSTRING(expr FROM start FOR length) - -SELECT substring('1234' AS arg_1 FROM 2 AS arg_2 FOR 3 AS arg_3), arg_1, arg_2, arg_3; -SELECT substring('1234' arg_1 FROM 2 arg_2 FOR 3 arg_3), arg_1, arg_2, arg_3; - - --- TRIM expression ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) +-- -- TRIM expression ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) SELECT trim(LEADING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2; SELECT trim(LEADING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2; +SELECT trim(LEADING 'a' FROM 'abca' AS arg_2), arg_2; +SELECT trim(LEADING 'a' FROM 'abca' arg_2), arg_2; +SELECT trim(LEADING 'a' AS arg_1 FROM 'abca'), arg_1; +SELECT trim(LEADING 'a' arg_1 FROM 'abca'), arg_1; +SELECT trim(LEADING 'a' FROM 'abca'); SELECT trim(TRAILING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2; SELECT trim(TRAILING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2; From c9a1605b1eaf48d41efaf79dd45083ab8c00eda4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Apr 2022 23:09:22 +0200 Subject: [PATCH 4/7] Fixed tests --- src/Parsers/ExpressionElementParsers.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 50e8002e6d0..122d9c545aa 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -658,10 +658,22 @@ namespace return false; } + ASTPtr start_pos_expr_node; + if (pos->type == TokenType::Comma) + { + ++pos; + + if (!expr_parser.parse(pos, start_pos_expr_node, expected)) + return false; + } + auto arguments = std::make_shared(); arguments->children.push_back(std::move(first_arg_expr_node)); arguments->children.push_back(std::move(second_arg_expr_node)); + if (start_pos_expr_node) + arguments->children.push_back(std::move(start_pos_expr_node)); + auto res = std::make_shared(); res->name = "position"; res->arguments = std::move(arguments); From 01e09ba44e05098ac00f3226de9f529f07a6e730 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Apr 2022 12:29:35 +0200 Subject: [PATCH 5/7] Fixed tests --- src/Parsers/ExpressionElementParsers.cpp | 22 +++++++++++++++---- ...ecial_operator_parse_alias_check.reference | 4 ++++ ...267_special_operator_parse_alias_check.sql | 4 ++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 122d9c545aa..5c28fd8190b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -593,7 +593,6 @@ namespace /// First try to match with date extract operator EXTRACT(part FROM date) /// Then with function extract(haystack, pattern) - IParser::Pos begin = pos; IntervalKind interval_kind; if (parseIntervalKind(pos, expected, interval_kind)) @@ -608,9 +607,9 @@ namespace node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); return true; } - } - pos = begin; + return false; + } ASTPtr expr_list; if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) @@ -758,7 +757,22 @@ namespace if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, right_node, expected)) return false; - node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), left_node, right_node); + ASTPtr timezone_node; + + if (pos->type == TokenType::Comma) + { + /// Optional timezone + ++pos; + + if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timezone_node, expected)) + return false; + } + + auto interval_literal = std::make_shared(interval_kind.toDateDiffUnit()); + if (timezone_node) + node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node), std::move(timezone_node)); + else + node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node)); return true; } diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference index bf33e71e45e..401d4d493a8 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.reference @@ -54,3 +54,7 @@ bc a abca 1 2019-05-05 2019-05-06 1 2019-05-05 2019-05-06 1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 +1 2019-05-05 2019-05-06 diff --git a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql index 6f95131315f..423bb619b00 100644 --- a/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql +++ b/tests/queries/0_stateless/02267_special_operator_parse_alias_check.sql @@ -110,8 +110,12 @@ SELECT dateSub(DAY, 1 arg_1, toDate('2019-05-05') arg_2), arg_1, arg_2; SELECT dateDiff(DAY, toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2), arg_1, arg_2; SELECT dateDiff(DAY, toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2), arg_1, arg_2; +SELECT dateDiff(DAY, toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2, 'UTC'), arg_1, arg_2; +SELECT dateDiff(DAY, toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2, 'UTC'), arg_1, arg_2; -- dateDiff('unit', startdate, enddate, [timezone]) SELECT dateDiff('DAY', toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2), arg_1, arg_2; SELECT dateDiff('DAY', toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2), arg_1, arg_2; +SELECT dateDiff('DAY', toDate('2019-05-05') AS arg_1, toDate('2019-05-06') AS arg_2, 'UTC'), arg_1, arg_2; +SELECT dateDiff('DAY', toDate('2019-05-05') arg_1, toDate('2019-05-06') arg_2, 'UTC'), arg_1, arg_2; From be53daf405860d414376441930d6e9d81754e70e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Apr 2022 14:20:10 +0200 Subject: [PATCH 6/7] Fixed tests --- src/Parsers/ExpressionElementParsers.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5c28fd8190b..aa629d50b12 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -593,6 +593,7 @@ namespace /// First try to match with date extract operator EXTRACT(part FROM date) /// Then with function extract(haystack, pattern) + IParser::Pos begin = pos; IntervalKind interval_kind; if (parseIntervalKind(pos, expected, interval_kind)) @@ -607,10 +608,10 @@ namespace node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); return true; } - - return false; } + pos = begin; + ASTPtr expr_list; if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) return false; From a0777511005b63384e7de7b08514357426a52cf9 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Apr 2022 21:31:50 +0200 Subject: [PATCH 7/7] Fixed style check --- src/Parsers/ExpressionElementParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index aa629d50b12..2fe03e9f95c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -771,7 +771,7 @@ namespace auto interval_literal = std::make_shared(interval_kind.toDateDiffUnit()); if (timezone_node) - node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node), std::move(timezone_node)); + node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node), std::move(timezone_node)); else node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node));