Fixed tests

This commit is contained in:
Maksim Kita 2022-04-20 16:13:34 +02:00
parent 90e3a75874
commit fb28791d83
4 changed files with 214 additions and 88 deletions

View File

@ -298,12 +298,9 @@ namespace
{
bool parseCastAs(IParser::Pos & pos, ASTPtr & node, Expected & expected)
{
/** Possible variants for cast operator
* First try to match with cast(expr AS Type);
* Then try to match with cast(expr [[as] alias_1], alias_expr [[as] alias_2]);
*
* We need to check if after keyword AS there is identifier followed by comma,
* if it is then it is not cast(expr AS Type)
/** Possible variants for cast operator cast(expr [[AS] alias_1] AS Type), cast(expr [[AS] alias_1], type_expr [[as] alias_2]).
* First try to match with cast(expr [[AS] alias_1] AS Type)
* Then try to match with cast(expr [[AS] alias_1], type_expr [[as] alias_2]).
*/
ASTPtr expr_node;
@ -312,14 +309,35 @@ namespace
if (ParserExpression().parse(pos, expr_node, expected))
{
bool parse_as = ParserKeyword("AS").ignore(pos, expected);
ParserKeyword as_keyword_parser("AS");
bool parse_as = as_keyword_parser.ignore(pos, expected);
/// CAST (a b AS UInt32) OR CAST (a b, expr)
if (!parse_as && ParserIdentifier().parse(pos, identifier_node, expected))
{
expr_node->setAlias(getIdentifierName(identifier_node));
parse_as = as_keyword_parser.ignore(pos, expected);
}
if (parse_as)
{
/// CAST (a AS Type) OR CAST (a AS b AS Type) OR CAST (a AS b, expr)
auto begin = pos;
auto expected_copy = expected;
bool next_identifier_with_comma
= ParserIdentifier().ignore(begin, expected_copy) && ParserToken(TokenType::Comma).ignore(begin, expected_copy);
bool next_identifier = ParserIdentifier().ignore(begin, expected_copy);
bool next_identifier_with_comma = next_identifier && ParserToken(TokenType::Comma).ignore(begin, expected_copy);
bool next_identifier_with_as
= next_identifier && !next_identifier_with_comma && as_keyword_parser.ignore(begin, expected_copy);
if (next_identifier_with_as)
{
if (ParserIdentifier().parse(pos, identifier_node, expected) && as_keyword_parser.ignore(pos, expected))
expr_node->setAlias(getIdentifierName(identifier_node));
else
return false;
}
if (!next_identifier_with_comma && ParserDataType().parse(pos, type_node, expected))
{
@ -328,10 +346,15 @@ namespace
}
}
if (ParserIdentifier().parse(pos, identifier_node, expected))
expr_node->setAlias(getIdentifierName(identifier_node));
else if (parse_as)
return false;
/// CAST(a AS b, expr)
if (parse_as)
{
if (ParserIdentifier().parse(pos, identifier_node, expected))
expr_node->setAlias(getIdentifierName(identifier_node));
else
return false;
}
if (ParserToken(TokenType::Comma).ignore(pos, expected)
&& ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected))
@ -349,42 +372,75 @@ namespace
/// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length)
/// The latter will be parsed normally as a function later.
ParserKeyword as_keyword_parser("AS");
ParserIdentifier identifier_parser;
ASTPtr expr_node;
ASTPtr start_node;
ASTPtr length_node;
if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected))
if (!ParserExpression().parse(pos, expr_node, expected))
return false;
if (pos->type != TokenType::Comma)
auto from_keyword_parser = ParserKeyword("FROM");
bool from_exists = from_keyword_parser.check(pos, expected);
if (!from_exists && pos->type != TokenType::Comma)
{
if (!ParserKeyword("FROM").ignore(pos, expected))
ASTPtr identifier_node;
bool parsed_as = as_keyword_parser.ignore(pos, expected);
bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected);
if (parsed_as && !parsed_identifer)
return false;
if (parsed_identifer)
expr_node->setAlias(getIdentifierName(identifier_node));
from_exists = from_keyword_parser.check(pos, expected);
}
else
if (pos->type == TokenType::Comma)
{
if (from_exists)
return false;
++pos;
}
if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, start_node, expected))
if (!ParserExpression().parse(pos, start_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
auto for_keyword_parser = ParserKeyword("FOR");
bool for_exists = for_keyword_parser.check(pos, expected);
if (!for_exists && pos->type != TokenType::Comma)
{
if (pos->type != TokenType::Comma)
{
if (!ParserKeyword("FOR").ignore(pos, expected))
return false;
}
else
{
++pos;
}
if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected))
ASTPtr identifier_node;
bool parsed_as = as_keyword_parser.ignore(pos, expected);
bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected);
if (parsed_as && !parsed_identifer)
return false;
if (parsed_identifer)
start_node->setAlias(getIdentifierName(identifier_node));
for_exists = for_keyword_parser.check(pos, expected);
}
bool need_parse_length_expression = for_exists;
if (pos->type == TokenType::Comma)
{
if (for_exists)
return false;
++pos;
need_parse_length_expression = true;
}
if (need_parse_length_expression
&& !ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected))
return false;
/// Convert to canonical representation in functional form: SUBSTRING(expr, start, length)
if (length_node)
node = makeASTFunction("substring", expr_node, start_node, length_node);
@ -430,9 +486,28 @@ namespace
if (char_override)
{
if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, to_remove, expected))
if (!ParserExpression().parse(pos, to_remove, expected))
return false;
if (!ParserKeyword("FROM").ignore(pos, expected))
auto from_keyword_parser = ParserKeyword("FROM");
bool from_exists = from_keyword_parser.check(pos, expected);
if (!from_exists)
{
ASTPtr identifier_node;
bool parsed_as = ParserKeyword("AS").ignore(pos, expected);
bool parsed_identifer = ParserIdentifier().parse(pos, identifier_node, expected);
if (parsed_as && !parsed_identifer)
return false;
if (parsed_identifer)
to_remove->setAlias(getIdentifierName(identifier_node));
from_exists = from_keyword_parser.check(pos, expected);
}
if (!from_exists)
return false;
auto quote_meta_func_node = std::make_shared<ASTFunction>();
@ -458,33 +533,24 @@ namespace
auto pattern_list_args = std::make_shared<ASTExpressionList>();
if (trim_left && trim_right)
{
pattern_list_args->children = {
std::make_shared<ASTLiteral>("^["),
to_remove,
std::make_shared<ASTLiteral>("]+|["),
to_remove,
std::make_shared<ASTLiteral>("]+$")
};
pattern_list_args->children
= {std::make_shared<ASTLiteral>("^["),
to_remove,
std::make_shared<ASTLiteral>("]+|["),
to_remove,
std::make_shared<ASTLiteral>("]+$")};
func_name = "replaceRegexpAll";
}
else
{
if (trim_left)
{
pattern_list_args->children = {
std::make_shared<ASTLiteral>("^["),
to_remove,
std::make_shared<ASTLiteral>("]+")
};
pattern_list_args->children = {std::make_shared<ASTLiteral>("^["), to_remove, std::make_shared<ASTLiteral>("]+")};
}
else
{
/// trim_right == false not possible
pattern_list_args->children = {
std::make_shared<ASTLiteral>("["),
to_remove,
std::make_shared<ASTLiteral>("]+$")
};
pattern_list_args->children = {std::make_shared<ASTLiteral>("["), to_remove, std::make_shared<ASTLiteral>("]+$")};
}
func_name = "replaceRegexpOne";
}
@ -563,28 +629,42 @@ namespace
/// First try to match with position(needle IN haystack)
/// Then with position(haystack, needle[, start_pos])
ASTPtr expr_list_node;
if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list_node, expected))
return false;
ParserExpressionWithOptionalAlias expr_parser(true /*allow_alias_without_as_keyword*/);
ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(expr_list_node.get());
if (expr_list && expr_list->children.size() == 1)
ASTPtr first_arg_expr_node;
if (!expr_parser.parse(pos, first_arg_expr_node, expected))
{
ASTFunction * func_in = typeid_cast<ASTFunction *>(expr_list->children[0].get());
if (func_in && func_in->name == "in")
return false;
}
ASTFunction * func_in = typeid_cast<ASTFunction *>(first_arg_expr_node.get());
if (func_in && func_in->name == "in")
{
ASTExpressionList * in_args = typeid_cast<ASTExpressionList *>(func_in->arguments.get());
if (in_args && in_args->children.size() == 2)
{
ASTExpressionList * in_args = typeid_cast<ASTExpressionList *>(func_in->arguments.get());
if (in_args && in_args->children.size() == 2)
{
node = makeASTFunction("position", in_args->children[1], in_args->children[0]);
return true;
}
node = makeASTFunction("position", in_args->children[1], in_args->children[0]);
return true;
}
}
if (pos->type != TokenType::Comma)
return false;
++pos;
ASTPtr second_arg_expr_node;
if (!expr_parser.parse(pos, second_arg_expr_node, expected))
{
return false;
}
auto arguments = std::make_shared<ASTExpressionList>();
arguments->children.push_back(std::move(first_arg_expr_node));
arguments->children.push_back(std::move(second_arg_expr_node));
auto res = std::make_shared<ASTFunction>();
res->name = "position";
res->arguments = expr_list_node;
res->arguments = std::move(arguments);
res->children.push_back(res->arguments);
node = std::move(res);
return true;

View File

@ -12,14 +12,14 @@ echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'a, b'"$(perl -e 'print ")" x 10
echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'a AS b'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'1'"$(perl -e 'print ", '"'UInt8'"')" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000
echo 'SELECT '"$(perl -e 'print "CAST(" x 100')"'1'"$(perl -e 'print " AS UInt8)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000
echo "SELECT fo,22222?LUTAY(SELECT(NOT CAUTAY(SELECT(NOT CAST(NOTT(NOT CAST(NOT NOT LEfT(NOT coARRAYlumnsFLuTAY(SELECT(NO0?LUTAY(SELECT(NOT CAUTAY(SELECT(NOT CAST(NOTT(NOT CAST(NOT NOT LEfT(NOT coARRAYlumnsFLuTAY(SELECT(NOTAYTAY(SELECT(NOTAYEFAULT(fo,22222?LUTAY(%SELECT(NOT CAST(NOT NOTAYTAY(SELECT(NOTAYEFAULT(fo,22222?LUTAY(SELECT(NOT CAST(NOT NOT (NOe)))))))))))))))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b, c))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo "SELECT position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(position(a, b, c))))))))))))))))))))" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x IN y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x IN y'"$(perl -e 'print ")" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'Syntax error'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print " IN x)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'
echo 'SELECT '"$(perl -e 'print "position(" x 100')"'x'"$(perl -e 'print ", x)" x 100')" | ${CLICKHOUSE_LOCAL} --max_parser_depth 10000 2>&1 | grep -cF 'UNKNOWN_IDENTIFIER'

View File

@ -1,14 +1,36 @@
1234 1234 UInt32
1234 1234 UInt32
1234
1234 1234
234 1234 2 3
234 1234 2 3
1234 1234
1234 1234
1234 1234
1234 1234
1234
1234 1234
1234 1234
1234 UInt32
1234 UInt32
1234 1234 UInt32
1234 1234 UInt32
234
234 1234
234 1234
234 2
234 2
234 1234 2
234 1234 2
234 1234 2
23
23 2
23 2
234 1234 2 3
234 1234 2 3
bca a abca
bca a abca
bca abca
bca abca
bca a
bca a
bca
abc a abca
abc a abca
bc a abca

View File

@ -1,33 +1,57 @@
-- CAST expression
-- cast(expr [[AS] alias_1] AS Type)
SELECT cast('1234' AS UInt32);
SELECT cast('1234' AS lhs AS UInt32), lhs;
SELECT cast('1234' lhs AS UInt32), lhs;
SELECT cast(('1234' AS lhs) AS UInt32), lhs;
SELECT cast(('1234' AS lhs) rhs AS UInt32), rhs;
SELECT cast(('1234' AS lhs) AS rhs AS UInt32), rhs;
-- cast(expr [[AS] alias_1], type_expr [[as] alias_2])
SELECT cast('1234', 'UInt32');
SELECT cast('1234' AS lhs, 'UInt32'), lhs;
SELECT cast('1234' lhs, 'UInt32'), lhs;
SELECT cast('1234', 'UInt32' AS rhs), rhs;
SELECT cast('1234', 'UInt32' rhs), rhs;
SELECT cast('1234' AS lhs, 'UInt32' AS rhs), lhs, rhs;
SELECT cast('1234' lhs, 'UInt32' rhs), lhs, rhs;
SELECT cast(('1234' AS lhs) AS UInt32), lhs;
SELECT cast('1234' AS lhs AS UInt32), lhs; --{clientError 62}
SELECT cast('1234' lhs AS UInt32), lhs; --{clientError 62}
-- SUBSTRING expression
-- SUBSTRING(expr FROM start)
SELECT substring('1234' FROM 2);
SELECT substring('1234' AS lhs FROM 2), lhs;
SELECT substring('1234' lhs FROM 2), lhs;
SELECT substring('1234' FROM 2 AS rhs), rhs;
SELECT substring('1234' FROM 2 rhs), rhs;
SELECT substring('1234' AS lhs FROM 2 AS rhs), lhs, rhs;
SELECT substring('1234' lhs FROM 2 rhs), lhs, rhs;
SELECT substring(('1234' AS lhs) FROM (2 AS rhs)), lhs, rhs;
-- SUBSTRING(expr FROM start FOR length)
SELECT substring('1234' FROM 2 FOR 2);
SELECT substring('1234' FROM 2 FOR 2 AS lhs), lhs;
SELECT substring('1234' FROM 2 FOR 2 lhs), lhs;
-- SUBSTRING(expr, start, length)
SELECT substring('1234' AS arg_1, 2 AS arg_2, 3 AS arg_3), arg_1, arg_2, arg_3;
SELECT substring('1234' arg_1, 2 arg_2, 3 arg_3), arg_1, arg_2, arg_3;
-- SUBSTRING(expr FROM start)
SELECT substring('1234' AS arg_1 FROM 2 AS arg_2), arg_1, arg_2;
SELECT substring('1234' arg_1 FROM 2 arg_2), arg_1, arg_2;
-- SUBSTRING(expr FROM start FOR length)
SELECT substring('1234' AS arg_1 FROM 2 AS arg_2 FOR 3 AS arg_3), arg_1, arg_2, arg_3;
SELECT substring('1234' arg_1 FROM 2 arg_2 FOR 3 arg_3), arg_1, arg_2, arg_3;
-- TRIM expression ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
-- -- TRIM expression ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
SELECT trim(LEADING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2;
SELECT trim(LEADING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2;
SELECT trim(LEADING 'a' FROM 'abca' AS arg_2), arg_2;
SELECT trim(LEADING 'a' FROM 'abca' arg_2), arg_2;
SELECT trim(LEADING 'a' AS arg_1 FROM 'abca'), arg_1;
SELECT trim(LEADING 'a' arg_1 FROM 'abca'), arg_1;
SELECT trim(LEADING 'a' FROM 'abca');
SELECT trim(TRAILING 'a' AS arg_1 FROM 'abca' AS arg_2), arg_1, arg_2;
SELECT trim(TRAILING 'a' arg_1 FROM 'abca' arg_2), arg_1, arg_2;