From 29803df5fa7f138e743556bc12c4e4dfde70de97 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 24 Feb 2022 13:54:30 +0000 Subject: [PATCH 001/173] pre-draft --- src/Parsers/ExpressionListParsers.cpp | 425 ++++++++++++++++++++++++ src/Parsers/ExpressionListParsers.h | 62 ++++ src/Parsers/ParserSelectQuery.cpp | 16 +- tests/queries/0_stateless/_02.reference | 33 ++ tests/queries/0_stateless/_02.sh | 64 ++++ 5 files changed, 598 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/_02.reference create mode 100755 tests/queries/0_stateless/_02.sh diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 13af308736b..0b5b6b293e6 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -17,8 +17,13 @@ #include #include +#include + using namespace std::literals; +#include +#include +#include namespace DB { @@ -592,6 +597,421 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return elem_parser.parse(pos, node, expected); } +//////////////////////////////////////////////////////////////////////////////////////// +// class Operator: +// - defines structure of certain operator +class Operator +{ +public: + Operator() + { + } + + Operator(String func_name_, Int32 priority_, Int32 arity_) : func_name(func_name_), priority(priority_), arity(arity_) + { + } + + String func_name; + Int32 priority; + Int32 arity; +}; + + +class Layer +{ +public: + Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "") : end_bracket(end_bracket_), func_name(func_name_) + { + } + + bool popOperator(Operator & op) + { + if (operators.size() == 0) + return false; + + op = std::move(operators.back()); + operators.pop_back(); + + return true; + } + + void pushOperator(Operator op) + { + operators.push_back(std::move(op)); + } + + bool popOperand(ASTPtr & op) + { + if (operands.size() == 0) + return false; + + op = std::move(operands.back()); + operands.pop_back(); + + return true; + } + + void pushOperand(ASTPtr op) + { + operands.push_back(std::move(op)); + } + + void pushResult(ASTPtr op) + { + result.push_back(std::move(op)); + } + + bool getResult(ASTPtr & op) + { + ASTs res; + std::swap(res, result); + + if (!func_name.empty()) + { + // Round brackets can mean priority operator together with function tuple() + if (func_name == "tuple" && res.size() == 1) + op = std::move(res[0]); + else + op = makeASTFunction(func_name, std::move(res)); + + return true; + } + + if (res.size() == 1) + { + op = std::move(res[0]); + return true; + } + + return false; + } + + TokenType endBracket() + { + return end_bracket; + } + + int previousPriority() + { + if (operators.empty()) + return 0; + + return operators.back().priority; + } + + int empty() + { + return operators.empty() && operands.empty(); + } + + bool lastNOperands(ASTs & asts, size_t n) + { + if (n > operands.size()) + return false; + + auto start = operands.begin() + operands.size() - n; + asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); + operands.erase(start, operands.end()); + + return true; + } + +private: + std::vector operators; + ASTs operands; + ASTs result; + TokenType end_bracket; + String func_name; +}; + + +bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + IParser::Pos begin = pos; + + if (ParserCastOperator().parse(pos, node, expected)) + return true; + + pos = begin; + + /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. + if (pos->type == TokenType::Minus) + { + if (ParserLiteral().parse(pos, node, expected)) + return true; + + pos = begin; + } + return false; +} + +bool ParseDateOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no DATE keyword, go to the nested parser. + if (!ParserKeyword("DATE").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDate", expr); + return true; +} + +bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no TIMESTAMP keyword, go to the nested parser. + if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDateTime", expr); + + return true; +} + +bool wrapLayer(Layer & layer) +{ + Operator cur_op; + while (layer.popOperator(cur_op)) + { + auto func = makeASTFunction(cur_op.func_name); + + if (!layer.lastNOperands(func->children[0]->children, cur_op.arity)) + return false; + + layer.pushOperand(func); + } + + ASTPtr res; + if (!layer.popOperand(res)) + return false; + + layer.pushResult(res); + + return layer.empty(); +} + +enum Action +{ + OPERAND, + OPERATOR +}; + +bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + static std::vector> op_table({ + {"+", Operator("plus", 20, 2)}, // Base arithmetics + {"-", Operator("minus", 20, 2)}, + {"*", Operator("multiply", 30, 2)}, + {"/", Operator("divide", 30, 2)}, + {"%", Operator("modulo", 30, 2)}, + {"MOD", Operator("modulo", 30, 2)}, + {"DIV", Operator("intDiv", 30, 2)}, + {"==", Operator("equals", 10, 2)}, // Base logic + {"!=", Operator("notEquals", 10, 2)}, + {"<>", Operator("notEquals", 10, 2)}, + {"<=", Operator("lessOrEquals", 10, 2)}, + {">=", Operator("greaterOrEquals", 10, 2)}, + {"<", Operator("less", 10, 2)}, + {">", Operator("greater", 10, 2)}, + {"=", Operator("equals", 10, 2)}, + {"AND", Operator("and", 5, 2)}, // AND OR + {"OR", Operator("or", 4, 2)}, + {"||", Operator("concat", 30, 2)}, // concat() func + {".", Operator("tupleElement", 40, 2)}, // tupleElement() func + {"IS NULL", Operator("isNull", 40, 1)}, // IS (NOT) NULL - correct priority ? + {"IS NOT NULL", Operator("isNotNull", 40, 1)}, + {"LIKE", Operator("like", 10, 2)}, // LIKE funcs + {"ILIKE", Operator("ilike", 10, 2)}, + {"NOT LIKE", Operator("notLike", 10, 2)}, + {"NOT ILIKE", Operator("notILike", 10, 2)}, + {"IN", Operator("in", 10, 2)}, // IN funcs + {"NOT IN", Operator("notIn", 10, 2)}, + {"GLOBAL IN", Operator("globalIn", 10, 2)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 10, 2)}, + }); + + static std::vector> op_table_unary({ + {"-", Operator("negate", 40, 1)}, + {"NOT", Operator("not", 9, 1)} + }); + + ParserCompoundIdentifier identifier_parser; + ParserNumber number_parser; + ParserAsterisk asterisk_parser; + ParserStringLiteral literal_parser; + + Action next = Action::OPERAND; + + std::vector storage(1); + + while (pos.isValid()) + { + if (next == Action::OPERAND) + { + next = Action::OPERATOR; + ASTPtr tmp; + + /// Special case for cast expression + if (ParseCastExpression(pos, tmp, expected)) + { + storage.back().pushOperand(std::move(tmp)); + continue; + } + + /// Try to find any unary operators + auto cur_op = op_table_unary.begin(); + for (; cur_op != op_table_unary.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != op_table_unary.end()) + { + next = Action::OPERAND; + storage.back().pushOperator(cur_op->second); + } + else if (ParseDateOperatorExpression(pos, tmp, expected) || + ParseTimestampOperatorExpression(pos, tmp, expected)) + { + storage.back().pushOperand(std::move(tmp)); + } + else if (identifier_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected)) + { + /// If the next token is '(' then it is a plain function, '[' - arrayElement function + + if (pos->type == TokenType::OpeningRoundBracket) + { + next = Action::OPERAND; + + storage.emplace_back(TokenType::ClosingRoundBracket, getIdentifierName(tmp)); + ++pos; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + next = Action::OPERAND; + + storage.back().pushOperand(std::move(tmp)); + storage.back().pushOperator(Operator("arrayElement", 40, 2)); + storage.emplace_back(TokenType::ClosingSquareBracket); + ++pos; + } + else + { + storage.back().pushOperand(std::move(tmp)); + } + } + else if (pos->type == TokenType::OpeningRoundBracket) + { + next = Action::OPERAND; + storage.emplace_back(TokenType::ClosingRoundBracket, "tuple"); + ++pos; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + next = Action::OPERAND; + storage.emplace_back(TokenType::ClosingSquareBracket, "array"); + ++pos; + } + else + { + break; + } + } + else + { + next = Action::OPERAND; + + /// Try to find operators from 'op_table' + auto cur_op = op_table.begin(); + for (; cur_op != op_table.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != op_table.end()) + { + while (storage.back().previousPriority() >= cur_op->second.priority) + { + Operator prev_op; + storage.back().popOperator(prev_op); + auto func = makeASTFunction(prev_op.func_name); + + if (!storage.back().lastNOperands(func->children[0]->children, prev_op.arity)) + return false; + + storage.back().pushOperand(func); + } + storage.back().pushOperator(cur_op->second); + } + else if (pos->type == TokenType::Comma) + { + if (storage.size() == 1) + break; + + if (!wrapLayer(storage.back())) + return false; + + ++pos; + } + else if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + { + next = Action::OPERATOR; + + if (pos->type != storage.back().endBracket()) + return false; + + if (!wrapLayer(storage.back())) + return false; + + ASTPtr res; + if (!storage.back().getResult(res)) + return false; + + storage.pop_back(); + storage.back().pushOperand(res); + ++pos; + } + else + { + break; + } + } + } + + if (storage.size() > 1) + return false; + + if (!wrapLayer(storage.back())) + return false; + + if (!storage.back().getResult(node)) + return false; + + return true; +} + +//////////////////////////////////////////////////////////////////////////////////////// bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -749,6 +1169,11 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected return nested_parser.parse(pos, node, expected) && !node->children.empty(); } +bool ParserNotEmptyExpressionList2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return nested_parser.parse(pos, node, expected) && !node->children.empty(); +} + bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 86d0fd0f861..212d8b8bf96 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -510,6 +510,68 @@ protected: }; +class ParserExpression2 : public IParserBase +{ +private: + ParserTernaryOperatorExpression elem_parser; + +protected: + const char * getName() const override { return "lambda expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserExpressionWithOptionalAlias2 : public IParserBase +{ +public: + explicit ParserExpressionWithOptionalAlias2(bool allow_alias_without_as_keyword, bool is_table_function = false) + : impl(std::make_unique( + is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), + allow_alias_without_as_keyword)) {} +protected: + ParserPtr impl; + + const char * getName() const override { return "expression with optional alias"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + return impl->parse(pos, node, expected); + } +}; + +/** A comma-separated list of expressions, probably empty. */ +class ParserExpressionList2 : public IParserBase +{ +public: + explicit ParserExpressionList2(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} + +protected: + bool allow_alias_without_as_keyword; + bool is_table_function; // This expression list is used by a table function + + const char * getName() const override { return "list of expressions"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + return ParserList( + std::make_unique(allow_alias_without_as_keyword, is_table_function), + std::make_unique(TokenType::Comma)) + .parse(pos, node, expected); + } +}; + +class ParserNotEmptyExpressionList2 : public IParserBase +{ +public: + explicit ParserNotEmptyExpressionList2(bool allow_alias_without_as_keyword) + : nested_parser(allow_alias_without_as_keyword) {} +private: + ParserExpressionList2 nested_parser; +protected: + const char * getName() const override { return "not empty list of expressions"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserOrderByExpressionList : public IParserBase { protected: diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 90ab5911d6b..54895cca097 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace DB { @@ -152,8 +153,19 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->limit_with_ties = true; } - if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) - return false; + // TEST + ParserToken test(TokenType::DollarSign); + if (test.ignore(pos, expected)) + { + if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) + return false; + } + else + { + ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); + if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) + return false; + } } /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) diff --git a/tests/queries/0_stateless/_02.reference b/tests/queries/0_stateless/_02.reference new file mode 100644 index 00000000000..9849f3c984a --- /dev/null +++ b/tests/queries/0_stateless/_02.reference @@ -0,0 +1,33 @@ +equal (RES): SELECT 1 + 1 +equal (AST): SELECT 1 + 1 +equal (RES): SELECT 3 + 7 * 5 + 32 / 2 - 5 * 2 +equal (AST): SELECT 3 + 7 * 5 + 32 / 2 - 5 * 2 +equal (RES): SELECT 100 MOD 5 DIV 20 MOD 5 +equal (AST): SELECT 100 MOD 5 DIV 20 MOD 5 +equal (RES): SELECT 1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5 +equal (AST): SELECT 1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5 +equal (RES): SELECT 20 MOD 10 > 200 DIV 6 +equal (AST): SELECT 20 MOD 10 > 200 DIV 6 +equal (RES): SELECT 5 != 80 / 8 + 2 * 5 +equal (AST): SELECT 5 != 80 / 8 + 2 * 5 +equal (AST): SELECT a.5 +equal (AST): SELECT a.b.5 +equal (AST): SELECT a.b.n.v +equal (AST): SELECT 10 * a.b.5 / 3 +equal (RES): SELECT -1::Int64 +equal (AST): SELECT -1::Int64 +equal (RES): SELECT [1,2,3]::Array(Int64) +equal (AST): SELECT [1,2,3]::Array(Int64) +equal (RES): SELECT [1,2,cos(1)] +equal (AST): SELECT [1,2,cos(1)] +equal (AST): SELECT [a,b,c] +equal (RES): SELECT number AS a1, number AS b2, number FROM numbers(10) +equal (AST): SELECT number AS a1, number AS b2, number FROM numbers(10) +equal (AST): SELECT *[n] +equal (RES): SELECT 3 + 7 * (5 + 32) / 2 - 5 * (2 - 1) +equal (AST): SELECT 3 + 7 * (5 + 32) / 2 - 5 * (2 - 1) +equal (AST): SELECT (a, b, c) * ((a, b, c) + (a, b, c)) +equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res +equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res +equal (AST): SELECT 'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack +equal (AST): SELECT 'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack diff --git a/tests/queries/0_stateless/_02.sh b/tests/queries/0_stateless/_02.sh new file mode 100755 index 00000000000..9bf5b0edde6 --- /dev/null +++ b/tests/queries/0_stateless/_02.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +compare () { + if [ "$2" != 0 ];then + R_1=$($CLICKHOUSE_CLIENT -q "SELECT $1") + R_2=$($CLICKHOUSE_CLIENT -q "SELECT \$ $1" 2>/dev/null) + + if [ "$R_1" == "$R_2" ];then + echo "equal (RES): SELECT $1"; + else + echo "============== not equal ===================" + echo "not equal (RES): SELECT $1"; + echo "# Original: $R_1"; + echo "# Ours: $R_2"; + echo "============================================" + fi + fi + + R_1=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST SELECT $1") + R_2=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST SELECT \$ $1" 2>/dev/null) + + if [ "$R_1" == "$R_2" ];then + echo "equal (AST): SELECT $1"; + else + echo "============== not equal ===================" + echo "not equal (AST): SELECT $1"; + echo "# Original: $R_1"; + echo "# Ours: $R_2"; + echo "============================================" + fi +} + +compare "1 + 1" +compare "3 + 7 * 5 + 32 / 2 - 5 * 2" +compare "100 MOD 5 DIV 20 MOD 5" +compare "1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5" +compare "20 MOD 10 > 200 DIV 6" +compare "5 != 80 / 8 + 2 * 5" + +compare "a.5" 0 +compare "a.b.5" 0 +compare "a.b.n.v" 0 +compare "10 * a.b.5 / 3" 0 + +compare "-1::Int64" +compare "[1,2,3]::Array(Int64)" +compare "[1,2,cos(1)]" +compare "[a,b,c]" 0 + +compare "number AS a1, number AS b2, number FROM numbers(10)" +compare "*[n]" 0 + +compare "3 + 7 * (5 + 32) / 2 - 5 * (2 - 1)" +compare "(a, b, c) * ((a, b, c) + (a, b, c))" 0 + +compare "1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res" 0 +compare "1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res" 0 + +compare "'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack" 0 +compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 \ No newline at end of file From 510195a938a54f17a8c0a67278af5c242a8b53c3 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 25 Feb 2022 10:46:28 +0000 Subject: [PATCH 002/173] Add functions w/o arguments, more literals --- src/Parsers/ExpressionListParsers.cpp | 52 ++++++++++++++++++--------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 0b5b6b293e6..12f2180b5cb 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -21,9 +21,6 @@ using namespace std::literals; -#include -#include -#include namespace DB { @@ -739,8 +736,6 @@ bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) { if (ParserLiteral().parse(pos, node, expected)) return true; - - pos = begin; } return false; } @@ -815,7 +810,7 @@ enum Action bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ - {"+", Operator("plus", 20, 2)}, // Base arithmetics + {"+", Operator("plus", 20, 2)}, // Base arithmetic {"-", Operator("minus", 20, 2)}, {"*", Operator("multiply", 30, 2)}, {"/", Operator("divide", 30, 2)}, @@ -854,7 +849,9 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCompoundIdentifier identifier_parser; ParserNumber number_parser; ParserAsterisk asterisk_parser; - ParserStringLiteral literal_parser; + ParserLiteral literal_parser; + ParserTupleOfLiterals tuple_literal_parser; + ParserArrayOfLiterals array_literal_parser; Action next = Action::OPERAND; @@ -888,23 +885,35 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back().pushOperator(cur_op->second); } else if (ParseDateOperatorExpression(pos, tmp, expected) || - ParseTimestampOperatorExpression(pos, tmp, expected)) + ParseTimestampOperatorExpression(pos, tmp, expected) || + tuple_literal_parser.parse(pos, tmp, expected) || + array_literal_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected)) { storage.back().pushOperand(std::move(tmp)); } else if (identifier_parser.parse(pos, tmp, expected) || - number_parser.parse(pos, tmp, expected) || - asterisk_parser.parse(pos, tmp, expected) || - literal_parser.parse(pos, tmp, expected)) + asterisk_parser.parse(pos, tmp, expected)) { /// If the next token is '(' then it is a plain function, '[' - arrayElement function if (pos->type == TokenType::OpeningRoundBracket) { - next = Action::OPERAND; - - storage.emplace_back(TokenType::ClosingRoundBracket, getIdentifierName(tmp)); ++pos; + + /// Special case for function with zero arguments: f() + if (pos->type == TokenType::ClosingRoundBracket) + { + ++pos; + auto function = makeASTFunction(getIdentifierName(tmp)); + storage.back().pushOperand(function); + } + else + { + next = Action::OPERAND; + storage.emplace_back(TokenType::ClosingRoundBracket, getIdentifierName(tmp)); + } } else if (pos->type == TokenType::OpeningSquareBracket) { @@ -928,9 +937,20 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (pos->type == TokenType::OpeningSquareBracket) { - next = Action::OPERAND; - storage.emplace_back(TokenType::ClosingSquareBracket, "array"); ++pos; + + /// Special case for empty array: [] + if (pos->type == TokenType::ClosingSquareBracket) + { + ++pos; + auto function = makeASTFunction("array"); + storage.back().pushOperand(function); + } + else + { + next = Action::OPERAND; + storage.emplace_back(TokenType::ClosingSquareBracket, "array"); + } } else { From 5d122758114053f77ce8d7e5c9153d9adb36131a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 8 Apr 2022 03:30:49 +0000 Subject: [PATCH 003/173] Add some functions --- src/Parsers/ExpressionListParsers.cpp | 579 ++++++++++++++++++++++---- tests/queries/0_stateless/_02.sh | 15 +- 2 files changed, 519 insertions(+), 75 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 12f2180b5cb..486ab68e063 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -17,6 +17,8 @@ #include #include +#include + #include using namespace std::literals; @@ -613,6 +615,11 @@ public: Int32 arity; }; +enum Action +{ + OPERAND, + OPERATOR +}; class Layer { @@ -621,6 +628,8 @@ public: { } + virtual ~Layer() = default; + bool popOperator(Operator & op) { if (operators.size() == 0) @@ -683,9 +692,28 @@ public: return false; } - TokenType endBracket() + virtual bool parse(IParser::Pos & pos, Expected & expected, Action & action) { - return end_bracket; + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + return wrapLayer(); + } + + if (end_bracket != TokenType::Whitespace && ParserToken(end_bracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + finished = true; + } + + return true; + } + + bool isFinished() + { + return finished; } int previousPriority() @@ -713,12 +741,427 @@ public: return true; } -private: + bool wrapLayer() + { + Operator cur_op; + while (popOperator(cur_op)) + { + auto func = makeASTFunction(cur_op.func_name); + + if (!lastNOperands(func->children[0]->children, cur_op.arity)) + return false; + + pushOperand(func); + } + + ASTPtr res; + if (!popOperand(res)) + return false; + + pushResult(res); + + return empty(); + } + +protected: std::vector operators; ASTs operands; ASTs result; TokenType end_bracket; String func_name; + bool finished = false; +}; + + +class CastLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// expr AS type + if (state == 0) + { + if (ParserKeyword("AS").ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + ASTPtr type_node; + + if (ParserDataType().parse(pos, type_node, expected) && ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + result[0] = createFunctionCast(result[0], type_node); + finished = true; + return true; + } + else + { + return false; + } + } + else if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 1; + } + } + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + result[0] = makeASTFunction("CAST", result[0], result[1]); + result.pop_back(); + finished = true; + return true; + } + } + + return true; + } + +private: + int state = 0; +}; + +class ExtractLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + IParser::Pos begin = pos; + ParserKeyword s_from("FROM"); + + if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) + { + state = 2; + return true; + } + else + { + state = 1; + pos = begin; + func_name = "extract"; + end_bracket = TokenType::ClosingRoundBracket; + } + } + + if (state == 1) + { + return Layer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + result[0] = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); + finished = true; + return true; + } + } + + return true; + } + +private: + int state = 0; + IntervalKind interval_kind; +}; + +class SubstringLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) + /// The latter will be parsed normally as a function later. + + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 1; + } + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FOR").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 2; + } + } + + if (state == 1 || state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + result = {makeASTFunction("substring", result)}; + finished = true; + return true; + } + } + + return true; + } + +private: + int state = 0; +}; + +class PositionLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 1; + } + if (ParserKeyword("IN").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 2; + } + } + + if (state == 1 || 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + if (state == 1) + result = {makeASTFunction("position", result)}; + else + result = {makeASTFunction("position", result[1], result[0])}; + + finished = true; + return true; + } + } + + return true; + } + +private: + int state = 0; +}; + + +class ExistsLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + ASTPtr node; + + // Recursion + if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) + return false; + + auto subquery = std::make_shared(); + subquery->children.push_back(node); + result = {makeASTFunction("exists", subquery)}; + + return true; + } +}; + +class TrimLayer : public Layer +{ +public: + TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) + { + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Handles all possible TRIM/LTRIM/RTRIM call variants + + if (state == 0) + { + if (!trim_left && !trim_right) + { + if (ParserKeyword("BOTH").ignore(pos, expected)) + { + trim_left = true; + trim_right = true; + char_override = true; + } + else if (ParserKeyword("LEADING").ignore(pos, expected)) + { + trim_left = true; + char_override = true; + } + else if (ParserKeyword("TRAILING").ignore(pos, expected)) + { + trim_right = true; + char_override = true; + } + else + { + trim_left = true; + trim_right = true; + } + + if (char_override) + state = 1; + else + state = 2; + } + else + { + state = 2; + } + } + + if (state == 1) + { + if (ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + to_remove = makeASTFunction("regexpQuoteMeta", result[0]); + result.clear(); + state = 2; + } + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + ASTPtr pattern_node; + + if (char_override) + { + auto pattern_func_node = std::make_shared(); + auto pattern_list_args = std::make_shared(); + if (trim_left && trim_right) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+|["), + to_remove, + std::make_shared("]+$") + }; + func_name = "replaceRegexpAll"; + } + else + { + if (trim_left) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+") + }; + } + else + { + /// trim_right == false not possible + pattern_list_args->children = { + std::make_shared("["), + to_remove, + std::make_shared("]+$") + }; + } + func_name = "replaceRegexpOne"; + } + + pattern_func_node->name = "concat"; + pattern_func_node->arguments = std::move(pattern_list_args); + pattern_func_node->children.push_back(pattern_func_node->arguments); + + pattern_node = std::move(pattern_func_node); + } + else + { + if (trim_left && trim_right) + { + func_name = "trimBoth"; + } + else + { + if (trim_left) + { + func_name = "trimLeft"; + } + else + { + /// trim_right == false not possible + func_name = "trimRight"; + } + } + } + + if (char_override) + { + result.push_back(pattern_node); + result.push_back(std::make_shared("")); + } + + finished = true; + } + } + + return true; + } +private: + int state = 0; + + bool trim_left; + bool trim_right; + bool char_override = false; + + ASTPtr to_remove; }; @@ -779,34 +1222,6 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } -bool wrapLayer(Layer & layer) -{ - Operator cur_op; - while (layer.popOperator(cur_op)) - { - auto func = makeASTFunction(cur_op.func_name); - - if (!layer.lastNOperands(func->children[0]->children, cur_op.arity)) - return false; - - layer.pushOperand(func); - } - - ASTPtr res; - if (!layer.popOperand(res)) - return false; - - layer.pushResult(res); - - return layer.empty(); -} - -enum Action -{ - OPERAND, - OPERATOR -}; - bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ @@ -855,10 +1270,28 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Action next = Action::OPERAND; - std::vector storage(1); + std::vector> storage; + storage.push_back(std::make_unique()); while (pos.isValid()) { + // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); + if (!storage.back()->parse(pos, expected, next)) + return false; + + if (storage.back()->isFinished()) + { + next = Action::OPERATOR; + + ASTPtr res; + if (!storage.back()->getResult(res)) + return false; + + storage.pop_back(); + storage.back()->pushOperand(res); + continue; + } + if (next == Action::OPERAND) { next = Action::OPERATOR; @@ -867,7 +1300,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Special case for cast expression if (ParseCastExpression(pos, tmp, expected)) { - storage.back().pushOperand(std::move(tmp)); + storage.back()->pushOperand(std::move(tmp)); continue; } @@ -882,7 +1315,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (cur_op != op_table_unary.end()) { next = Action::OPERAND; - storage.back().pushOperator(cur_op->second); + storage.back()->pushOperator(cur_op->second); } else if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || @@ -891,7 +1324,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) number_parser.parse(pos, tmp, expected) || literal_parser.parse(pos, tmp, expected)) { - storage.back().pushOperand(std::move(tmp)); + storage.back()->pushOperand(std::move(tmp)); } else if (identifier_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected)) @@ -907,32 +1340,53 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ++pos; auto function = makeASTFunction(getIdentifierName(tmp)); - storage.back().pushOperand(function); + storage.back()->pushOperand(function); } else { next = Action::OPERAND; - storage.emplace_back(TokenType::ClosingRoundBracket, getIdentifierName(tmp)); + + String function_name = getIdentifierName(tmp); + String function_name_lowercase = Poco::toLower(function_name); + + if (function_name_lowercase == "cast") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "extract") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "substring") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "position") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "exists") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "trim") + storage.push_back(std::make_unique(false, false)); + else if (function_name_lowercase == "ltrim") + storage.push_back(std::make_unique(true, false)); + else if (function_name_lowercase == "rtrim") + storage.push_back(std::make_unique(false, true)); + else + storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, function_name)); } } else if (pos->type == TokenType::OpeningSquareBracket) { next = Action::OPERAND; - storage.back().pushOperand(std::move(tmp)); - storage.back().pushOperator(Operator("arrayElement", 40, 2)); - storage.emplace_back(TokenType::ClosingSquareBracket); + storage.back()->pushOperand(std::move(tmp)); + storage.back()->pushOperator(Operator("arrayElement", 40, 2)); + storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); ++pos; } else { - storage.back().pushOperand(std::move(tmp)); + storage.back()->pushOperand(std::move(tmp)); } } else if (pos->type == TokenType::OpeningRoundBracket) { next = Action::OPERAND; - storage.emplace_back(TokenType::ClosingRoundBracket, "tuple"); + storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, "tuple")); ++pos; } else if (pos->type == TokenType::OpeningSquareBracket) @@ -944,12 +1398,12 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ++pos; auto function = makeASTFunction("array"); - storage.back().pushOperand(function); + storage.back()->pushOperand(function); } else { next = Action::OPERAND; - storage.emplace_back(TokenType::ClosingSquareBracket, "array"); + storage.push_back(std::make_unique(TokenType::ClosingSquareBracket, "array")); } } else @@ -971,46 +1425,23 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (cur_op != op_table.end()) { - while (storage.back().previousPriority() >= cur_op->second.priority) + while (storage.back()->previousPriority() >= cur_op->second.priority) { Operator prev_op; - storage.back().popOperator(prev_op); + storage.back()->popOperator(prev_op); auto func = makeASTFunction(prev_op.func_name); - if (!storage.back().lastNOperands(func->children[0]->children, prev_op.arity)) + if (!storage.back()->lastNOperands(func->children[0]->children, prev_op.arity)) return false; - storage.back().pushOperand(func); + storage.back()->pushOperand(func); } - storage.back().pushOperator(cur_op->second); + storage.back()->pushOperator(cur_op->second); } else if (pos->type == TokenType::Comma) { if (storage.size() == 1) break; - - if (!wrapLayer(storage.back())) - return false; - - ++pos; - } - else if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) - { - next = Action::OPERATOR; - - if (pos->type != storage.back().endBracket()) - return false; - - if (!wrapLayer(storage.back())) - return false; - - ASTPtr res; - if (!storage.back().getResult(res)) - return false; - - storage.pop_back(); - storage.back().pushOperand(res); - ++pos; } else { @@ -1022,10 +1453,10 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (storage.size() > 1) return false; - if (!wrapLayer(storage.back())) + if (!storage.back()->wrapLayer()) return false; - if (!storage.back().getResult(node)) + if (!storage.back()->getResult(node)) return false; return true; diff --git a/tests/queries/0_stateless/_02.sh b/tests/queries/0_stateless/_02.sh index 9bf5b0edde6..aec8fdac4a5 100755 --- a/tests/queries/0_stateless/_02.sh +++ b/tests/queries/0_stateless/_02.sh @@ -61,4 +61,17 @@ compare "1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res" 0 compare "1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res" 0 compare "'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack" 0 -compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 \ No newline at end of file +compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 + +compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 +compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 + +compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 +compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 + +compare "EXTRACT(DAY FROM toDate('2017-06-15'))" +compare "substring(toFixedString('hello12345', 16) from 1 for 8)" +compare "position('Hello, world!' IN '!')" + +compare "trim(TRAILING 'x' FROM 'xxfooxx')" +# compare "ltrim('') || rtrim('') || trim('')" From 46bf0fdfbf27cdc35b7f980895f16cbb52c6710f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 8 Apr 2022 15:36:36 +0000 Subject: [PATCH 004/173] Add some more functions --- src/Parsers/ExpressionListParsers.cpp | 238 +++++++++++++++++++++++- tests/queries/0_stateless/_02.reference | 12 ++ 2 files changed, 241 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 486ab68e063..83f47d0018f 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -624,7 +624,10 @@ enum Action class Layer { public: - Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "") : end_bracket(end_bracket_), func_name(func_name_) + Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "", bool layer_zero_ = false) : + end_bracket(end_bracket_), + func_name(func_name_), + layer_zero(layer_zero_) { } @@ -694,7 +697,7 @@ public: virtual bool parse(IParser::Pos & pos, Expected & expected, Action & action) { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) + if (!layer_zero && ParserToken(TokenType::Comma).ignore(pos, expected)) { action = Action::OPERAND; return wrapLayer(); @@ -702,10 +705,8 @@ public: if (end_bracket != TokenType::Whitespace && ParserToken(end_bracket).ignore(pos, expected)) { - if (!wrapLayer()) - return false; - finished = true; + return wrapLayer(); } return true; @@ -770,6 +771,7 @@ protected: TokenType end_bracket; String func_name; bool finished = false; + bool layer_zero; }; @@ -974,7 +976,7 @@ public: result = {makeASTFunction("position", result)}; else result = {makeASTFunction("position", result[1], result[0])}; - + finished = true; return true; } @@ -995,14 +997,19 @@ public: { ASTPtr node; - // Recursion + // Recursion :'( if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) return false; + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + auto subquery = std::make_shared(); subquery->children.push_back(node); result = {makeASTFunction("exists", subquery)}; + finished = true; + return true; } }; @@ -1165,6 +1172,205 @@ private: }; +class DateAddLayer : public Layer +{ +public: + DateAddLayer(const char * function_name_) : function_name(function_name_) + { + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + + action = Action::OPERAND; + state = 2; + } + else + { + func_name = function_name; + end_bracket = TokenType::ClosingRoundBracket; + state = 1; + } + } + + if (state == 1) + { + return Layer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 3; + } + } + + if (state == 3) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); + result = {makeASTFunction(function_name, result[1], result[0])}; + finished = true; + } + } + return true; + } + +private: + int state = 0; + IntervalKind interval_kind; + const char * function_name; +}; + + +class DateDiffLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (!parseIntervalKind(pos, expected, interval_kind)) + { + func_name = "dateDiff"; + end_bracket = TokenType::ClosingRoundBracket; + state = 1; + } + else + { + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + + state = 2; + } + } + + if (state == 1) + { + return Layer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + + state = 3; + } + } + + if (state == 3) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + if (result.size() != 2) + return false; + + result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1])}; + finished = true; + } + } + + return true; + } + +private: + int state = 0; + IntervalKind interval_kind; +}; + + +class IntervalLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + if (state == 0) + { + auto begin = pos; + auto init_expected = expected; + ASTPtr string_literal; + //// A String literal followed INTERVAL keyword, + /// the literal can be a part of an expression or + /// include Number and INTERVAL TYPE at the same time + if (ParserStringLiteral{}.parse(pos, string_literal, expected)) + { + String literal; + if (string_literal->as().value.tryGet(literal)) + { + Tokens tokens(literal.data(), literal.data() + literal.size()); + IParser::Pos token_pos(tokens, 0); + Expected token_expected; + ASTPtr expr; + + if (!ParserNumber{}.parse(token_pos, expr, token_expected)) + return false; + else + { + /// case: INTERVAL '1' HOUR + /// back to begin + if (!token_pos.isValid()) + { + pos = begin; + expected = init_expected; + } + else + /// case: INTERVAL '1 HOUR' + if (!parseIntervalKind(token_pos, token_expected, interval_kind)) + return false; + + result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; + finished = true; + } + } + } + state = 1; + } + + if (state == 1) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + if (!wrapLayer()) + return false; + + result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result)}; + finished = true; + } + } + + return true; + } + +private: + int state = 0; + IntervalKind interval_kind; +}; + + bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) { IParser::Pos begin = pos; @@ -1271,14 +1477,14 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Action next = Action::OPERAND; std::vector> storage; - storage.push_back(std::make_unique()); + storage.push_back(std::make_unique(TokenType::Whitespace, "", true)); while (pos.isValid()) { // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); if (!storage.back()->parse(pos, expected, next)) return false; - + if (storage.back()->isFinished()) { next = Action::OPERATOR; @@ -1317,6 +1523,11 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERAND; storage.back()->pushOperator(cur_op->second); } + else if (parseOperator(pos, "INTERVAL", expected)) + { + next = Action::OPERAND; + storage.push_back(std::make_unique()); + } else if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || tuple_literal_parser.parse(pos, tmp, expected) || @@ -1365,6 +1576,15 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.push_back(std::make_unique(true, false)); else if (function_name_lowercase == "rtrim") storage.push_back(std::make_unique(false, true)); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + storage.push_back(std::make_unique("plus")); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + storage.push_back(std::make_unique("minus")); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + storage.push_back(std::make_unique()); else storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, function_name)); } diff --git a/tests/queries/0_stateless/_02.reference b/tests/queries/0_stateless/_02.reference index 9849f3c984a..4876823f290 100644 --- a/tests/queries/0_stateless/_02.reference +++ b/tests/queries/0_stateless/_02.reference @@ -31,3 +31,15 @@ equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res equal (AST): SELECT 'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack equal (AST): SELECT 'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack +equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c] as Array(String)), 4] +equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c], Array(String)), 4] +equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c] as Array(String)), 4] +equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c], Array(String)), 4] +equal (RES): SELECT EXTRACT(DAY FROM toDate('2017-06-15')) +equal (AST): SELECT EXTRACT(DAY FROM toDate('2017-06-15')) +equal (RES): SELECT substring(toFixedString('hello12345', 16) from 1 for 8) +equal (AST): SELECT substring(toFixedString('hello12345', 16) from 1 for 8) +equal (RES): SELECT position('Hello, world!' IN '!') +equal (AST): SELECT position('Hello, world!' IN '!') +equal (RES): SELECT trim(TRAILING 'x' FROM 'xxfooxx') +equal (AST): SELECT trim(TRAILING 'x' FROM 'xxfooxx') From 2a50ef98813546c9e8b5e3901d7d4bc5a6ef5124 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 12 Apr 2022 15:03:10 +0000 Subject: [PATCH 005/173] Add CASE & CAST --- src/Parsers/ExpressionListParsers.cpp | 106 +++++++++++++++++++++++- tests/queries/0_stateless/_02.reference | 1 + tests/queries/0_stateless/_02.sh | 2 + 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 83f47d0018f..6ab55f1942e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -20,6 +20,7 @@ #include #include +#include using namespace std::literals; @@ -624,7 +625,7 @@ enum Action class Layer { public: - Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "", bool layer_zero_ = false) : + Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "", bool layer_zero_ = false) : end_bracket(end_bracket_), func_name(func_name_), layer_zero(layer_zero_) @@ -1371,6 +1372,92 @@ private: }; +class CaseLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + auto old_pos = pos; + has_case_expr = !ParserKeyword("WHEN").ignore(pos, expected); + pos = old_pos; + + state = 1; + } + + if (state == 1) + { + if (ParserKeyword("WHEN").ignore(pos, expected)) + { + if ((has_case_expr || result.size() > 0) && !wrapLayer()) + return false; + + action = Action::OPERAND; + state = 2; + } + else if (ParserKeyword("ELSE").ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + action = Action::OPERAND; + state = 3; + } + else if (ParserKeyword("END").ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + Field field_with_null; + ASTLiteral null_literal(field_with_null); + result.push_back(std::make_shared(null_literal)); + + if (has_case_expr) + result = {makeASTFunction("caseWithExpression", result)}; + else + result = {makeASTFunction("multiIf", result)}; + finished = true; + } + } + + if (state == 2) + { + if (ParserKeyword("THEN").ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + action = Action::OPERAND; + state = 1; + } + } + + if (state == 3) + { + if (ParserKeyword("END").ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + if (has_case_expr) + result = {makeASTFunction("caseWithExpression", result)}; + else + result = {makeASTFunction("multiIf", result)}; + + finished = true; + } + } + + return true; + } + +private: + int state = 0; + bool has_case_expr; +}; + + bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) { IParser::Pos begin = pos; @@ -1528,6 +1615,11 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERAND; storage.push_back(std::make_unique()); } + else if (parseOperator(pos, "CASE", expected)) + { + next = Action::OPERAND; // ??? + storage.push_back(std::make_unique()); + } else if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || tuple_literal_parser.parse(pos, tmp, expected) || @@ -1541,7 +1633,6 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) asterisk_parser.parse(pos, tmp, expected)) { /// If the next token is '(' then it is a plain function, '[' - arrayElement function - if (pos->type == TokenType::OpeningRoundBracket) { ++pos; @@ -1658,6 +1749,17 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } storage.back()->pushOperator(cur_op->second); } + else if (parseOperator(pos, "::", expected)) + { + next = Action::OPERATOR; + + ASTPtr type_ast; + if (!ParserDataType().parse(pos, type_ast, expected)) + return false; // ??? + + storage.back()->pushOperator(Operator("CAST", 50, 2)); + storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); + } else if (pos->type == TokenType::Comma) { if (storage.size() == 1) diff --git a/tests/queries/0_stateless/_02.reference b/tests/queries/0_stateless/_02.reference index 4876823f290..2f08358e4c7 100644 --- a/tests/queries/0_stateless/_02.reference +++ b/tests/queries/0_stateless/_02.reference @@ -21,6 +21,7 @@ equal (AST): SELECT [1,2,3]::Array(Int64) equal (RES): SELECT [1,2,cos(1)] equal (AST): SELECT [1,2,cos(1)] equal (AST): SELECT [a,b,c] +equal (AST): SELECT [a,b,c]::Array(UInt8) equal (RES): SELECT number AS a1, number AS b2, number FROM numbers(10) equal (AST): SELECT number AS a1, number AS b2, number FROM numbers(10) equal (AST): SELECT *[n] diff --git a/tests/queries/0_stateless/_02.sh b/tests/queries/0_stateless/_02.sh index aec8fdac4a5..e59876159f7 100755 --- a/tests/queries/0_stateless/_02.sh +++ b/tests/queries/0_stateless/_02.sh @@ -50,6 +50,8 @@ compare "-1::Int64" compare "[1,2,3]::Array(Int64)" compare "[1,2,cos(1)]" compare "[a,b,c]" 0 +compare "[a,b,c]::Array(UInt8)" 0 + compare "number AS a1, number AS b2, number FROM numbers(10)" compare "*[n]" 0 From e8b949284d9c2716cd3babfc91d71fed3d57d3fb Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Sun, 17 Apr 2022 20:59:03 +0000 Subject: [PATCH 006/173] Add lambda --- src/Parsers/ExpressionListParsers.cpp | 38 ++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6ab55f1942e..5903b86e863 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -678,7 +678,7 @@ public: if (!func_name.empty()) { - // Round brackets can mean priority operator together with function tuple() + // Round brackets can mean priority operator as well as function tuple() if (func_name == "tuple" && res.size() == 1) op = std::move(res[0]); else @@ -765,6 +765,35 @@ public: return empty(); } + bool parseLambda() + { + // 0. If empty - create function tuple with 0 args + if (empty()) + { + auto func = makeASTFunction("tuple"); + pushOperand(func); + return true; + } + + if (!wrapLayer()) + return false; + + /// 1. If there is already tuple do nothing + if (tryGetFunctionName(result.back()).value_or("") == "tuple") + { + pushOperand(result.back()); + result.pop_back(); + } + /// 2. Put all result in a single tuple + else + { + auto func = makeASTFunction("tuple", result); + result.clear(); + pushOperand(func); + } + return true; + } + protected: std::vector operators; ASTs operands; @@ -1760,6 +1789,13 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperator(Operator("CAST", 50, 2)); storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); } + else if (parseOperator(pos, "->", expected)) + { + if (!storage.back()->parseLambda()) + return false; + + storage.back()->pushOperator(Operator("lambda", 50, 2)); + } else if (pos->type == TokenType::Comma) { if (storage.size() == 1) From 93c1f93c6a1afa36dfb1b6a9db93254e7babf100 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 21 Apr 2022 11:31:01 +0000 Subject: [PATCH 007/173] Add ternary operator --- src/Parsers/ExpressionListParsers.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 5903b86e863..36864574380 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -748,6 +748,16 @@ public: Operator cur_op; while (popOperator(cur_op)) { + if (cur_op.func_name == "if_pre") + return false; + + if (cur_op.func_name == "if") + { + Operator tmp; + if (!popOperator(tmp) || tmp.func_name != "if_pre") + return false; + } + auto func = makeASTFunction(cur_op.func_name); if (!lastNOperands(func->children[0]->children, cur_op.arity)) @@ -779,7 +789,7 @@ public: return false; /// 1. If there is already tuple do nothing - if (tryGetFunctionName(result.back()).value_or("") == "tuple") + if (tryGetFunctionName(result.back()) == "tuple") { pushOperand(result.back()); result.pop_back(); @@ -1576,6 +1586,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"NOT IN", Operator("notIn", 10, 2)}, {"GLOBAL IN", Operator("globalIn", 10, 2)}, {"GLOBAL NOT IN", Operator("globalNotIn", 10, 2)}, + {"?", Operator("if_pre", 3, 0)}, + {":", Operator("if", 4, 3)}, }); static std::vector> op_table_unary({ @@ -1794,7 +1806,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!storage.back()->parseLambda()) return false; - storage.back()->pushOperator(Operator("lambda", 50, 2)); + storage.back()->pushOperator(Operator("lambda", 2, 2)); } else if (pos->type == TokenType::Comma) { From 4af1533fee18b269dee13df1b678daa298d482aa Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 29 Apr 2022 15:07:42 +0000 Subject: [PATCH 008/173] Add aliases & aggregate functions --- src/Parsers/ExpressionListParsers.cpp | 107 ++++++++++++++++++-------- 1 file changed, 73 insertions(+), 34 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 36864574380..a64fcf59cea 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -680,9 +680,21 @@ public: { // Round brackets can mean priority operator as well as function tuple() if (func_name == "tuple" && res.size() == 1) + { op = std::move(res[0]); + } else - op = makeASTFunction(func_name, std::move(res)); + { + auto func = makeASTFunction(func_name, std::move(res)); + + if (parameters) + { + func->parameters = parameters; + func->children.push_back(func->parameters); + } + + op = func; + } return true; } @@ -698,6 +710,10 @@ public: virtual bool parse(IParser::Pos & pos, Expected & expected, Action & action) { + if (isFinished()) + return true; + + // fix: layer_zero is basically end_bracket != TokenType::Whitespace if (!layer_zero && ParserToken(TokenType::Comma).ignore(pos, expected)) { action = Action::OPERAND; @@ -706,8 +722,22 @@ public: if (end_bracket != TokenType::Whitespace && ParserToken(end_bracket).ignore(pos, expected)) { - finished = true; - return wrapLayer(); + if (!wrapLayer()) + return false; + + // fix: move to other place, ()()() will work + if (end_bracket == TokenType::ClosingRoundBracket && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + parameters = std::make_shared(); + std::swap(parameters->children, result); + action = Action::OPERAND; + } + else + { + state = -1; + } + + return true; } return true; @@ -715,7 +745,7 @@ public: bool isFinished() { - return finished; + return state == -1; } int previousPriority() @@ -748,6 +778,7 @@ public: Operator cur_op; while (popOperator(cur_op)) { + // Special case for ternary operator if (cur_op.func_name == "if_pre") return false; @@ -804,14 +835,30 @@ public: return true; } + bool parseAlias(ASTPtr node) + { + if (result.empty()) + return false; + /// FIXME: try to prettify this cast using `as<>()` + if (auto * ast_with_alias = node->as()) + // if (auto * ast_with_alias = dynamic_cast(result.back().get())) + tryGetIdentifierNameInto(node, ast_with_alias->alias); + else + return false; + + return true; + } + protected: std::vector operators; ASTs operands; ASTs result; TokenType end_bracket; String func_name; - bool finished = false; + int state = 0; bool layer_zero; + + ASTPtr parameters; }; @@ -833,7 +880,7 @@ public: if (ParserDataType().parse(pos, type_node, expected) && ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) { result[0] = createFunctionCast(result[0], type_node); - finished = true; + state = -1; return true; } else @@ -860,16 +907,13 @@ public: result[0] = makeASTFunction("CAST", result[0], result[1]); result.pop_back(); - finished = true; + state = -1; return true; } } return true; } - -private: - int state = 0; }; class ExtractLayer : public Layer @@ -909,7 +953,7 @@ public: return false; result[0] = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); - finished = true; + state = -1; return true; } } @@ -918,7 +962,6 @@ public: } private: - int state = 0; IntervalKind interval_kind; }; @@ -966,16 +1009,13 @@ public: return false; result = {makeASTFunction("substring", result)}; - finished = true; + state = -1; return true; } } return true; } - -private: - int state = 0; }; class PositionLayer : public Layer @@ -1017,16 +1057,13 @@ public: else result = {makeASTFunction("position", result[1], result[0])}; - finished = true; + state = -1; return true; } } return true; } - -private: - int state = 0; }; @@ -1048,7 +1085,7 @@ public: subquery->children.push_back(node); result = {makeASTFunction("exists", subquery)}; - finished = true; + state = -1; return true; } @@ -1195,15 +1232,13 @@ public: result.push_back(std::make_shared("")); } - finished = true; + state = -1; } } return true; } private: - int state = 0; - bool trim_left; bool trim_right; bool char_override = false; @@ -1266,14 +1301,13 @@ public: result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); result = {makeASTFunction(function_name, result[1], result[0])}; - finished = true; + state = -1; } } return true; } private: - int state = 0; IntervalKind interval_kind; const char * function_name; }; @@ -1330,7 +1364,7 @@ public: return false; result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1])}; - finished = true; + state = -1; } } @@ -1338,7 +1372,6 @@ public: } private: - int state = 0; IntervalKind interval_kind; }; @@ -1383,7 +1416,7 @@ public: return false; result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; - finished = true; + state = -1; } } } @@ -1398,7 +1431,7 @@ public: return false; result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result)}; - finished = true; + state = -1; } } @@ -1406,7 +1439,6 @@ public: } private: - int state = 0; IntervalKind interval_kind; }; @@ -1456,7 +1488,7 @@ public: result = {makeASTFunction("caseWithExpression", result)}; else result = {makeASTFunction("multiIf", result)}; - finished = true; + state = -1; } } @@ -1484,7 +1516,7 @@ public: else result = {makeASTFunction("multiIf", result)}; - finished = true; + state = -1; } } @@ -1492,7 +1524,6 @@ public: } private: - int state = 0; bool has_case_expr; }; @@ -1766,6 +1797,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else { next = Action::OPERAND; + ASTPtr tmp; /// Try to find operators from 'op_table' auto cur_op = op_table.begin(); @@ -1808,6 +1840,13 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperator(Operator("lambda", 2, 2)); } + else if (storage.size() > 1 && ParserAlias(false).parse(pos, tmp, expected)) + { + if (!storage.back()->parse(pos, expected, next)) + return false; + if (!storage.back()->parseAlias(tmp)) + return false; + } else if (pos->type == TokenType::Comma) { if (storage.size() == 1) From b473a275939a85d5a009e14a7c9beeaf3db4ba93 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 29 Apr 2022 16:29:32 +0000 Subject: [PATCH 009/173] Little fixes --- src/Parsers/ExpressionListParsers.cpp | 30 ++++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index a64fcf59cea..6263e70a656 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -679,12 +679,15 @@ public: if (!func_name.empty()) { // Round brackets can mean priority operator as well as function tuple() - if (func_name == "tuple" && res.size() == 1) + if (func_name == "tuple_" && res.size() == 1) { op = std::move(res[0]); } else { + if (func_name == "tuple_") + func_name = "tuple"; + auto func = makeASTFunction(func_name, std::move(res)); if (parameters) @@ -725,7 +728,7 @@ public: if (!wrapLayer()) return false; - // fix: move to other place, ()()() will work + // fix: move to other place, ()()() will work, aliases f(a as b)(c) - won't work if (end_bracket == TokenType::ClosingRoundBracket && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { parameters = std::make_shared(); @@ -736,8 +739,6 @@ public: { state = -1; } - - return true; } return true; @@ -839,9 +840,8 @@ public: { if (result.empty()) return false; - /// FIXME: try to prettify this cast using `as<>()` - if (auto * ast_with_alias = node->as()) - // if (auto * ast_with_alias = dynamic_cast(result.back().get())) + + if (auto * ast_with_alias = dynamic_cast(result.back().get())) tryGetIdentifierNameInto(node, ast_with_alias->alias); else return false; @@ -1769,7 +1769,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else if (pos->type == TokenType::OpeningRoundBracket) { next = Action::OPERAND; - storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, "tuple")); + storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, "tuple_")); ++pos; } else if (pos->type == TokenType::OpeningSquareBracket) @@ -1846,6 +1846,20 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; if (!storage.back()->parseAlias(tmp)) return false; + + // duplicate code :( + if (storage.back()->isFinished()) + { + next = Action::OPERATOR; + + ASTPtr res; + if (!storage.back()->getResult(res)) + return false; + + storage.pop_back(); + storage.back()->pushOperand(res); + continue; + } } else if (pos->type == TokenType::Comma) { From 0937547bcf66f62ed6d5c91dcdca8e1312c79a9c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 4 May 2022 18:54:04 +0000 Subject: [PATCH 010/173] Add subqueries, qualified asterisk, column matcher --- src/Parsers/ExpressionListParsers.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6263e70a656..a5f16911cce 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1633,6 +1633,10 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserTupleOfLiterals tuple_literal_parser; ParserArrayOfLiterals array_literal_parser; + // Recursion + ParserQualifiedAsterisk qualified_asterisk_parser; + ParserColumnsMatcher columns_matcher_parser; + Action next = Action::OPERAND; std::vector> storage; @@ -1697,12 +1701,14 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tuple_literal_parser.parse(pos, tmp, expected) || array_literal_parser.parse(pos, tmp, expected) || number_parser.parse(pos, tmp, expected) || - literal_parser.parse(pos, tmp, expected)) + literal_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + qualified_asterisk_parser.parse(pos, tmp, expected) || + columns_matcher_parser.parse(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); } - else if (identifier_parser.parse(pos, tmp, expected) || - asterisk_parser.parse(pos, tmp, expected)) + else if (identifier_parser.parse(pos, tmp, expected)) { /// If the next token is '(' then it is a plain function, '[' - arrayElement function if (pos->type == TokenType::OpeningRoundBracket) @@ -1768,6 +1774,11 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (pos->type == TokenType::OpeningRoundBracket) { + if (ParserSubquery().parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + continue; + } next = Action::OPERAND; storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, "tuple_")); ++pos; From ecfed4aaefd7051bf30fddb18b3b2c3fad4e018a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 12 May 2022 10:07:58 +0000 Subject: [PATCH 011/173] Add query parameter substitution --- src/Parsers/ExpressionListParsers.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 691d14aed28..9e615717be4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1638,6 +1638,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserLiteral literal_parser; ParserTupleOfLiterals tuple_literal_parser; ParserArrayOfLiterals array_literal_parser; + ParserSubstitution substitution_parser; // Recursion ParserQualifiedAsterisk qualified_asterisk_parser; @@ -1710,7 +1711,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) + columns_matcher_parser.parse(pos, tmp, expected) || + substitution_parser.parse(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); } From 73adb4bb59633839176af310ce39c93a3e2abd6f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 19 May 2022 08:37:26 +0000 Subject: [PATCH 012/173] Fix arrayElement operator --- src/Parsers/ExpressionListParsers.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 9e615717be4..0aef7f07faa 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1766,15 +1766,6 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, function_name)); } } - else if (pos->type == TokenType::OpeningSquareBracket) - { - next = Action::OPERAND; - - storage.back()->pushOperand(std::move(tmp)); - storage.back()->pushOperator(Operator("arrayElement", 40, 2)); - storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); - ++pos; - } else { storage.back()->pushOperand(std::move(tmp)); @@ -1841,6 +1832,12 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } storage.back()->pushOperator(cur_op->second); } + else if (pos->type == TokenType::OpeningSquareBracket) + { + storage.back()->pushOperator(Operator("arrayElement", 40, 2)); + storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); + ++pos; + } else if (parseOperator(pos, "::", expected)) { next = Action::OPERATOR; From f8f9731509a4ca55795356fac05684829e3a0e2b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 26 May 2022 16:50:48 +0000 Subject: [PATCH 013/173] Fix aliases and IS [NOT] NULL --- src/Parsers/ExpressionListParsers.cpp | 62 +++++++++++++++------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 0aef7f07faa..02373235d9d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -613,7 +613,7 @@ public: { } - Operator(String func_name_, Int32 priority_, Int32 arity_) : func_name(func_name_), priority(priority_), arity(arity_) + Operator(String func_name_, Int32 priority_, Int32 arity_ = 2) : func_name(func_name_), priority(priority_), arity(arity_) { } @@ -780,7 +780,7 @@ public: return true; } - bool wrapLayer() + bool wrapLayer(bool push_to_result = true) { Operator cur_op; while (popOperator(cur_op)) @@ -804,13 +804,18 @@ public: pushOperand(func); } - ASTPtr res; - if (!popOperand(res)) + ASTPtr node; + if (!popOperand(node)) return false; - pushResult(res); + bool res = empty(); - return empty(); + if (push_to_result) + pushResult(node); + else + pushOperand(node); + + return res; } bool parseLambda() @@ -844,10 +849,13 @@ public: bool parseAlias(ASTPtr node) { - if (result.empty()) + if (!wrapLayer(false)) return false; - if (auto * ast_with_alias = dynamic_cast(result.back().get())) + if (operands.empty()) + return false; + + if (auto * ast_with_alias = dynamic_cast(operands.back().get())) tryGetIdentifierNameInto(node, ast_with_alias->alias); else return false; @@ -911,8 +919,7 @@ public: if (!wrapLayer()) return false; - result[0] = makeASTFunction("CAST", result[0], result[1]); - result.pop_back(); + result = {makeASTFunction("CAST", result[0], result[1])}; state = -1; return true; } @@ -1051,6 +1058,17 @@ public: } } + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!wrapLayer()) + return false; + } + } + if (state == 1 || 2) { if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) @@ -1613,8 +1631,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"OR", Operator("or", 4, 2)}, {"||", Operator("concat", 30, 2)}, // concat() func {".", Operator("tupleElement", 40, 2)}, // tupleElement() func - {"IS NULL", Operator("isNull", 40, 1)}, // IS (NOT) NULL - correct priority ? - {"IS NOT NULL", Operator("isNotNull", 40, 1)}, + {"IS NULL", Operator("isNull", 9, 1)}, // IS (NOT) NULL + {"IS NOT NULL", Operator("isNotNull", 9, 1)}, {"LIKE", Operator("like", 10, 2)}, // LIKE funcs {"ILIKE", Operator("ilike", 10, 2)}, {"NOT LIKE", Operator("notLike", 10, 2)}, @@ -1831,6 +1849,10 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperand(func); } storage.back()->pushOperator(cur_op->second); + + // isNull & isNotNull is postfix unary operator + if (cur_op->second.func_name == "isNull" || cur_op->second.func_name == "isNotNull") + next = Action::OPERATOR; } else if (pos->type == TokenType::OpeningSquareBracket) { @@ -1858,24 +1880,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (storage.size() > 1 && ParserAlias(false).parse(pos, tmp, expected)) { - if (!storage.back()->parse(pos, expected, next)) - return false; if (!storage.back()->parseAlias(tmp)) return false; - - // duplicate code :( - if (storage.back()->isFinished()) - { - next = Action::OPERATOR; - - ASTPtr res; - if (!storage.back()->getResult(res)) - return false; - - storage.pop_back(); - storage.back()->pushOperand(res); - continue; - } } else if (pos->type == TokenType::Comma) { From 45544da0197485d2729414ecf38fa0110923ec34 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 26 May 2022 22:53:37 +0000 Subject: [PATCH 014/173] Fix Aliases in special cases --- src/Parsers/ExpressionListParsers.cpp | 80 ++++++++++++++++++++------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 02373235d9d..3914a25ebf2 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -847,7 +847,7 @@ public: return true; } - bool parseAlias(ASTPtr node) + bool insertAlias(ASTPtr node) { if (!wrapLayer(false)) return false; @@ -881,28 +881,67 @@ class CastLayer : public Layer public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + ParserKeyword as_keyword_parser("AS"); + ASTPtr alias; + /// expr AS type if (state == 0) { - if (ParserKeyword("AS").ignore(pos, expected)) + ASTPtr type_node; + + if (as_keyword_parser.ignore(pos, expected)) { - if (!wrapLayer()) - return false; + auto old_pos = pos; - ASTPtr type_node; - - if (ParserDataType().parse(pos, type_node, expected) && ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + if (ParserIdentifier().parse(pos, alias, expected) && + as_keyword_parser.ignore(pos, expected) && + ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) { - result[0] = createFunctionCast(result[0], type_node); + if (!insertAlias(alias)) + return false; + + if (!wrapLayer()) + return false; + + result = {createFunctionCast(result[0], type_node)}; state = -1; return true; } - else + + pos = old_pos; + + if (ParserIdentifier().parse(pos, alias, expected) && + ParserToken(TokenType::Comma).ignore(pos, expected)) { - return false; + action = Action::OPERAND; + if (!insertAlias(alias)) + return false; + + if (!wrapLayer()) + return false; + + state = 1; + return true; } + + pos = old_pos; + + if (ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!wrapLayer()) + return false; + + result = {createFunctionCast(result[0], type_node)}; + state = -1; + return true; + } + + return false; } - else if (ParserToken(TokenType::Comma).ignore(pos, expected)) + + if (ParserToken(TokenType::Comma).ignore(pos, expected)) { action = Action::OPERAND; @@ -910,6 +949,7 @@ public: return false; state = 1; + return true; } } if (state == 1) @@ -1372,22 +1412,20 @@ public: if (!wrapLayer()) return false; - - state = 3; } - } - if (state == 3) - { if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) { if (!wrapLayer()) return false; - if (result.size() != 2) + if (result.size() == 2) + result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1])}; + else if (result.size() == 3) + result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2])}; + else return false; - result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1])}; state = -1; } } @@ -1669,7 +1707,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) while (pos.isValid()) { - // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); + // // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); if (!storage.back()->parse(pos, expected, next)) return false; @@ -1878,9 +1916,9 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperator(Operator("lambda", 2, 2)); } - else if (storage.size() > 1 && ParserAlias(false).parse(pos, tmp, expected)) + else if (storage.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) { - if (!storage.back()->parseAlias(tmp)) + if (!storage.back()->insertAlias(tmp)) return false; } else if (pos->type == TokenType::Comma) From daa20441a726da73f3eaa6ff66428caf79ae4714 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 26 May 2022 23:38:25 +0000 Subject: [PATCH 015/173] Add operator BETWEEN --- src/Parsers/ExpressionListParsers.cpp | 115 ++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 3914a25ebf2..8d05ce0fc07 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -785,18 +785,85 @@ public: Operator cur_op; while (popOperator(cur_op)) { - // Special case for ternary operator - if (cur_op.func_name == "if_pre") + ASTPtr func; + + // Special case of ternary operator + if (cur_op.func_name == "if_1") return false; if (cur_op.func_name == "if") { Operator tmp; - if (!popOperator(tmp) || tmp.func_name != "if_pre") + if (!popOperator(tmp) || tmp.func_name != "if_1") return false; } - auto func = makeASTFunction(cur_op.func_name); + // Special case of a BETWEEN b AND c operator + if (cur_op.func_name == "between_1" || cur_op.func_name == "not_between_1") + return false; + + if (cur_op.func_name == "between_2") + { + Operator tmp; + if (!popOperator(tmp) || !(tmp.func_name == "between_1" || tmp.func_name == "not_between_1")) + return false; + + bool negative = tmp.func_name == "not_between_1"; + + ASTs arguments; + if (!lastNOperands(arguments, 3)) + return false; + + // subject = arguments[0], left = arguments[1], right = arguments[2] + auto f_combined_expression = std::make_shared(); + auto args_combined_expression = std::make_shared(); + + /// [NOT] BETWEEN left AND right + auto f_left_expr = std::make_shared(); + auto args_left_expr = std::make_shared(); + + auto f_right_expr = std::make_shared(); + auto args_right_expr = std::make_shared(); + + args_left_expr->children.emplace_back(arguments[0]); + args_left_expr->children.emplace_back(arguments[1]); + + args_right_expr->children.emplace_back(arguments[0]); + args_right_expr->children.emplace_back(arguments[2]); + + if (negative) + { + /// NOT BETWEEN + f_left_expr->name = "less"; + f_right_expr->name = "greater"; + f_combined_expression->name = "or"; + } + else + { + /// BETWEEN + f_left_expr->name = "greaterOrEquals"; + f_right_expr->name = "lessOrEquals"; + f_combined_expression->name = "and"; + } + + f_left_expr->arguments = args_left_expr; + f_left_expr->children.emplace_back(f_left_expr->arguments); + + f_right_expr->arguments = args_right_expr; + f_right_expr->children.emplace_back(f_right_expr->arguments); + + args_combined_expression->children.emplace_back(f_left_expr); + args_combined_expression->children.emplace_back(f_right_expr); + + f_combined_expression->arguments = args_combined_expression; + f_combined_expression->children.emplace_back(f_combined_expression->arguments); + + func = f_combined_expression; + } + else + { + func = makeASTFunction(cur_op.func_name); + } if (!lastNOperands(func->children[0]->children, cur_op.arity)) return false; @@ -863,6 +930,21 @@ public: return true; } + void addBetween() + { + ++open_between; + } + + void subBetween() + { + --open_between; + } + + bool hasBetween() + { + return open_between > 0; + } + protected: std::vector operators; ASTs operands; @@ -873,6 +955,8 @@ protected: bool layer_zero; ASTPtr parameters; + + int open_between = 0; }; @@ -1679,8 +1763,10 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"NOT IN", Operator("notIn", 10, 2)}, {"GLOBAL IN", Operator("globalIn", 10, 2)}, {"GLOBAL NOT IN", Operator("globalNotIn", 10, 2)}, - {"?", Operator("if_pre", 3, 0)}, + {"?", Operator("if_1", 3, 0)}, {":", Operator("if", 4, 3)}, + {"BETWEEN", Operator("between_1", 5, 0)}, + {"NOT BETWEEN", Operator("not_between_1", 5, 0)}, }); static std::vector> op_table_unary({ @@ -1707,7 +1793,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) while (pos.isValid()) { - // // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); + // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); if (!storage.back()->parse(pos, expected, next)) return false; @@ -1875,7 +1961,15 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (cur_op != op_table.end()) { - while (storage.back()->previousPriority() >= cur_op->second.priority) + auto op = cur_op->second; + + if (op.func_name == "and" && storage.back()->hasBetween()) + { + storage.back()->subBetween(); + op = Operator("between_2", 6, 0); + } + + while (storage.back()->previousPriority() >= op.priority) { Operator prev_op; storage.back()->popOperator(prev_op); @@ -1886,11 +1980,14 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperand(func); } - storage.back()->pushOperator(cur_op->second); + storage.back()->pushOperator(op); // isNull & isNotNull is postfix unary operator - if (cur_op->second.func_name == "isNull" || cur_op->second.func_name == "isNotNull") + if (op.func_name == "isNull" || op.func_name == "isNotNull") next = Action::OPERATOR; + + if (op.func_name == "between_1" || op.func_name == "not_between_1") + storage.back()->addBetween(); } else if (pos->type == TokenType::OpeningSquareBracket) { From ea9ee2ca5095a9b3da453afe0f7e9eca72ce870d Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 2 Jun 2022 22:07:14 +0000 Subject: [PATCH 016/173] Fix arrayElement & CAST, add DISTINCT & ALL --- src/Parsers/ExpressionListParsers.cpp | 73 +++++++++++++++++++++------ 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 8d05ce0fc07..6c0af85f040 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -813,7 +813,7 @@ public: ASTs arguments; if (!lastNOperands(arguments, 3)) return false; - + // subject = arguments[0], left = arguments[1], right = arguments[2] auto f_combined_expression = std::make_shared(); auto args_combined_expression = std::make_shared(); @@ -1767,6 +1767,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {":", Operator("if", 4, 3)}, {"BETWEEN", Operator("between_1", 5, 0)}, {"NOT BETWEEN", Operator("not_between_1", 5, 0)}, + {"[", Operator("arrayElement", 40, 2)}, // Layer is added in the process + {"::", Operator("CAST", 50, 2)} }); static std::vector> op_table_unary({ @@ -1782,6 +1784,9 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserArrayOfLiterals array_literal_parser; ParserSubstitution substitution_parser; + ParserKeyword filter("FILTER"); + ParserKeyword over("OVER"); + // Recursion ParserQualifiedAsterisk qualified_asterisk_parser; ParserColumnsMatcher columns_matcher_parser; @@ -1905,7 +1910,45 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") storage.push_back(std::make_unique()); else + { + bool has_all = false; + bool has_distinct = false; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_all = false; + has_distinct = false; + } + } + + if (has_distinct) + function_name += "Distinct"; + storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, function_name)); + } } } else @@ -1963,6 +2006,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto op = cur_op->second; + // AND can be both boolean function and part of the BETWEEN ... AND ... operator if (op.func_name == "and" && storage.back()->hasBetween()) { storage.back()->subBetween(); @@ -1982,29 +2026,26 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } storage.back()->pushOperator(op); + if (op.func_name == "arrayElement") + storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); + // isNull & isNotNull is postfix unary operator if (op.func_name == "isNull" || op.func_name == "isNotNull") next = Action::OPERATOR; if (op.func_name == "between_1" || op.func_name == "not_between_1") storage.back()->addBetween(); - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - storage.back()->pushOperator(Operator("arrayElement", 40, 2)); - storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); - ++pos; - } - else if (parseOperator(pos, "::", expected)) - { - next = Action::OPERATOR; - ASTPtr type_ast; - if (!ParserDataType().parse(pos, type_ast, expected)) - return false; // ??? + if (op.func_name == "CAST") + { + next = Action::OPERATOR; - storage.back()->pushOperator(Operator("CAST", 50, 2)); - storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); + ASTPtr type_ast; + if (!ParserDataType().parse(pos, type_ast, expected)) + return false; + + storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); + } } else if (parseOperator(pos, "->", expected)) { From 0951627b24fbd01bb1fdbbccd17157a4fd16ec54 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 017/173] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b586979b546..0da70193fea 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -69,7 +69,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9e3b60a8e54..a48bfefbcf4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -38,6 +38,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 24649128cee..cd257567cd5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -70,6 +70,7 @@ #include +#include namespace ProfileEvents { @@ -406,10 +407,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From cb4c45340238a148b4a942f145f66c82a9c1e7b9 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 018/173] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 766b1193d44ef0c1310fd606a1fba52661735154 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 019/173] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From 96bea2245b659b06c6c6a1f3ec9ddbc940d72969 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 020/173] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 61543683ecc09878e8855aabb3f36299637c0df7 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 021/173] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 7163b4359e506abaf0da50c1b26688b5aba2f275 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 022/173] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 44bbbd8b9f64901b828eac074f0047f5d565b0c8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 023/173] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 35207909e946de2fa30ab643dcadebb5286f10c2 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 024/173] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 31781601cb459589cb21fbf60d1139d7a3fc1652 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 025/173] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From c2b3aff3d7f54731dbbe93e89ec043d7699c9523 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 026/173] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 363a09018694b54844c64176f7b033f20a55e8c8 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 16 Jun 2022 01:53:27 +0000 Subject: [PATCH 027/173] Add FILTER & OVER. Refactor. Bug fixes --- src/Parsers/ExpressionListParsers.cpp | 622 +++++++++++------- .../01062_max_parser_depth.reference | 3 +- .../01196_max_parser_depth.reference | 7 +- .../0_stateless/01196_max_parser_depth.sh | 6 +- 4 files changed, 409 insertions(+), 229 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6c0af85f040..6fcc92cb120 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -603,6 +603,23 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return elem_parser.parse(pos, node, expected); } +//// Chaining: a < b < c < d becomes (a < b) AND (b < c) AND (c < d) +// ATRPtr chain(std::vector operators, std::vector operands) +// { +// ASTPtrs res; +// res.reserve(operators.size()); +// for (size_t i = 0; i < operators.size(); i++) +// { +// res.push_back(makeASTFunction(operators[i].func_name, {operands[i], operands[i + 1]})); +// } +// return makeASTFunction("and", res); +// } + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + //////////////////////////////////////////////////////////////////////////////////////// // class Operator: // - defines structure of certain operator @@ -631,13 +648,6 @@ enum Action class Layer { public: - Layer(TokenType end_bracket_ = TokenType::Whitespace, String func_name_ = "", bool layer_zero_ = false) : - end_bracket(end_bracket_), - func_name(func_name_), - layer_zero(layer_zero_) - { - } - virtual ~Layer() = default; bool popOperator(Operator & op) @@ -677,76 +687,19 @@ public: result.push_back(std::move(op)); } - bool getResult(ASTPtr & op) + virtual bool getResult(ASTPtr & op) { - ASTs res; - std::swap(res, result); - - if (!func_name.empty()) + if (result.size() == 1) { - // Round brackets can mean priority operator as well as function tuple() - if (func_name == "tuple_" && res.size() == 1) - { - op = std::move(res[0]); - } - else - { - if (func_name == "tuple_") - func_name = "tuple"; - - auto func = makeASTFunction(func_name, std::move(res)); - - if (parameters) - { - func->parameters = parameters; - func->children.push_back(func->parameters); - } - - op = func; - } - - return true; - } - - if (res.size() == 1) - { - op = std::move(res[0]); + op = std::move(result[0]); return true; } return false; } - virtual bool parse(IParser::Pos & pos, Expected & expected, Action & action) + virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) { - if (isFinished()) - return true; - - // fix: layer_zero is basically end_bracket != TokenType::Whitespace - if (!layer_zero && ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - return wrapLayer(); - } - - if (end_bracket != TokenType::Whitespace && ParserToken(end_bracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - // fix: move to other place, ()()() will work, aliases f(a as b)(c) - won't work - if (end_bracket == TokenType::ClosingRoundBracket && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) - { - parameters = std::make_shared(); - std::swap(parameters->children, result); - action = Action::OPERAND; - } - else - { - state = -1; - } - } - return true; } @@ -863,10 +816,10 @@ public: else { func = makeASTFunction(cur_op.func_name); - } - if (!lastNOperands(func->children[0]->children, cur_op.arity)) - return false; + if (!lastNOperands(func->children[0]->children, cur_op.arity)) + return false; + } pushOperand(func); } @@ -914,6 +867,28 @@ public: return true; } + bool parseBase(IParser::Pos & pos, Expected & expected, Action & action, TokenType separator, TokenType end) + { + if (ParserToken(separator).ignore(pos, expected)) + { + action = Action::OPERAND; + return wrapLayer(); + } + + if (ParserToken(end).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty() || !result.empty()) + if (!wrapLayer()) + return false; + + state = -1; + } + + return true; + } + bool insertAlias(ASTPtr node) { if (!wrapLayer(false)) @@ -949,16 +924,253 @@ protected: std::vector operators; ASTs operands; ASTs result; - TokenType end_bracket; - String func_name; int state = 0; - bool layer_zero; - - ASTPtr parameters; int open_between = 0; }; +class FunctionLayer : public Layer +{ +public: + FunctionLayer(String func_name_) : func_name(func_name_) + { + } + + // bool getResult(ASTPtr & op) override + // { + // auto func = makeASTFunction(func_name, std::move(res)); + + // if (parameters) + // { + // func->parameters = parameters; + // func->children.push_back(func->parameters); + // } + + // op = func; + + // return true; + // } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + state = 1; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_all = false; + has_distinct = false; + } + } + + if (has_distinct) + func_name += "Distinct"; + + contents_begin = pos->begin; + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + return wrapLayer(); + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty() || !result.empty()) + if (!wrapLayer()) + return false; + + contents_end = pos->begin; + + /** Check for a common error case - often due to the complexity of quoting command-line arguments, + * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). + * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, + * and the query silently returns an unexpected result. + */ + if (func_name == "toDate" + && contents_end - contents_begin == strlen("2014-01-01") + && contents_begin[0] >= '2' && contents_begin[0] <= '3' + && contents_begin[1] >= '0' && contents_begin[1] <= '9' + && contents_begin[2] >= '0' && contents_begin[2] <= '9' + && contents_begin[3] >= '0' && contents_begin[3] <= '9' + && contents_begin[4] == '-' + && contents_begin[5] >= '0' && contents_begin[5] <= '9' + && contents_begin[6] >= '0' && contents_begin[6] <= '9' + && contents_begin[7] == '-' + && contents_begin[8] >= '0' && contents_begin[8] <= '9' + && contents_begin[9] >= '0' && contents_begin[9] <= '9') + { + std::string contents_str(contents_begin, contents_end - contents_begin); + throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" + , ErrorCodes::SYNTAX_ERROR); + } + + if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + parameters = std::make_shared(); + std::swap(parameters->children, result); + action = Action::OPERAND; + } + else + { + state = 2; + } + } + } + + if (state == 2) + { + auto function_node = makeASTFunction(func_name, std::move(result)); + + if (parameters) + { + function_node->parameters = parameters; + function_node->children.push_back(function_node->parameters); + } + + ParserKeyword filter("FILTER"); + ParserKeyword over("OVER"); + + if (filter.ignore(pos, expected)) + { + // We are slightly breaking the parser interface by parsing the window + // definition into an existing ASTFunction. Normally it would take a + // reference to ASTPtr and assign it the new node. We only have a pointer + // of a different type, hence this workaround with a temporary pointer. + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserFilterClause filter_parser; + if (!filter_parser.parse(pos, function_node_as_iast, expected)) + return false; + } + + if (over.ignore(pos, expected)) + { + function_node->is_window_function = true; + + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserWindowReference window_reference; + if (!window_reference.parse(pos, function_node_as_iast, expected)) + return false; + } + + result = {function_node}; + state = -1; + } + + return true; + } + +private: + bool has_all = false; + bool has_distinct = false; + + const char * contents_begin; + const char * contents_end; + + String func_name; + ASTPtr parameters; +}; + + +class RoundBracketsLayer : public Layer +{ +public: + bool getResult(ASTPtr & op) override + { + // Round brackets can mean priority operator as well as function tuple() + if (!is_tuple && result.size() == 1) + op = std::move(result[0]); + else + op = makeASTFunction("tuple", std::move(result)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + is_tuple = true; + if (!wrapLayer()) + return false; + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty()) + if (!wrapLayer()) + return false; + + state = -1; + } + + return true; + } +private: + bool is_tuple = false; +}; + +class ArrayLayer : public Layer +{ +public: + bool getResult(ASTPtr & op) override + { + op = makeASTFunction("array", std::move(result)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingSquareBracket); + } +}; + +// FunctionBaseLayer + +class ArrayElementLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingSquareBracket); + } +}; class CastLayer : public Layer { @@ -1056,6 +1268,21 @@ public: class ExtractLayer : public Layer { public: + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + if (result.size() == 0) + return false; + + op = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); + } + else + op = makeASTFunction("extract", std::move(result)); + + return true; + } + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (state == 0) @@ -1065,6 +1292,7 @@ public: if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) { + parsed_interval_kind = true; state = 2; return true; } @@ -1072,14 +1300,12 @@ public: { state = 1; pos = begin; - func_name = "extract"; - end_bracket = TokenType::ClosingRoundBracket; } } if (state == 1) { - return Layer::parse(pos, expected, action); + return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); } if (state == 2) @@ -1089,7 +1315,6 @@ public: if (!wrapLayer()) return false; - result[0] = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); state = -1; return true; } @@ -1100,6 +1325,7 @@ public: private: IntervalKind interval_kind; + bool parsed_interval_kind = false; }; class SubstringLayer : public Layer @@ -1193,7 +1419,7 @@ public: } } - if (state == 1 || 2) + if (state == 1 || state == 2) { if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) { @@ -1246,6 +1472,12 @@ public: { } + bool getResult(ASTPtr & op) override + { + op = makeASTFunction(func_name, std::move(result)); + return true; + } + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// Handles all possible TRIM/LTRIM/RTRIM call variants @@ -1392,6 +1624,7 @@ private: bool char_override = false; ASTPtr to_remove; + String func_name; }; @@ -1402,6 +1635,20 @@ public: { } + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); + op = makeASTFunction(function_name, result[1], result[0]); + } + else + op = makeASTFunction(function_name, std::move(result)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (state == 0) @@ -1413,18 +1660,17 @@ public: action = Action::OPERAND; state = 2; + parsed_interval_kind = true; } else { - func_name = function_name; - end_bracket = TokenType::ClosingRoundBracket; state = 1; } } if (state == 1) { - return Layer::parse(pos, expected, action); + return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); } if (state == 2) @@ -1447,8 +1693,6 @@ public: if (!wrapLayer()) return false; - result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); - result = {makeASTFunction(function_name, result[1], result[0])}; state = -1; } } @@ -1458,60 +1702,49 @@ public: private: IntervalKind interval_kind; const char * function_name; + bool parsed_interval_kind = false; }; class DateDiffLayer : public Layer { public: + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + if (result.size() == 2) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1]); + else if (result.size() == 3) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2]); + else + return false; + } + else + { + op = makeASTFunction("dateDiff", std::move(result)); + } + return true; + } + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (state == 0) { - if (!parseIntervalKind(pos, expected, interval_kind)) - { - func_name = "dateDiff"; - end_bracket = TokenType::ClosingRoundBracket; - state = 1; - } - else + if (parseIntervalKind(pos, expected, interval_kind)) { + parsed_interval_kind = true; + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) return false; - - state = 2; } + + state = 1; } if (state == 1) { - return Layer::parse(pos, expected, action); - } - - if (state == 2) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - } - - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - if (result.size() == 2) - result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1])}; - else if (result.size() == 3) - result = {makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2])}; - else - return false; - - state = -1; - } + return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); } return true; @@ -1519,6 +1752,7 @@ public: private: IntervalKind interval_kind; + bool parsed_interval_kind = false; }; @@ -1557,12 +1791,15 @@ public: expected = init_expected; } else + { /// case: INTERVAL '1 HOUR' if (!parseIntervalKind(token_pos, token_expected, interval_kind)) return false; result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; state = -1; + return true; + } } } } @@ -1772,11 +2009,11 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) }); static std::vector> op_table_unary({ - {"-", Operator("negate", 40, 1)}, + {"-", Operator("negate", 39, 1)}, {"NOT", Operator("not", 9, 1)} }); - ParserCompoundIdentifier identifier_parser; + ParserCompoundIdentifier identifier_parser(false, true); ParserNumber number_parser; ParserAsterisk asterisk_parser; ParserLiteral literal_parser; @@ -1794,7 +2031,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Action next = Action::OPERAND; std::vector> storage; - storage.push_back(std::make_unique(TokenType::Whitespace, "", true)); + storage.push_back(std::make_unique()); while (pos.isValid()) { @@ -1858,8 +2095,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected) || - substitution_parser.parse(pos, tmp, expected)) + columns_matcher_parser.parse(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); } @@ -1870,92 +2106,48 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ++pos; - /// Special case for function with zero arguments: f() - if (pos->type == TokenType::ClosingRoundBracket) - { - ++pos; - auto function = makeASTFunction(getIdentifierName(tmp)); - storage.back()->pushOperand(function); - } + next = Action::OPERAND; + + String function_name = getIdentifierName(tmp); + String function_name_lowercase = Poco::toLower(function_name); + + if (function_name_lowercase == "cast") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "extract") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "substring") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "position") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "exists") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "trim") + storage.push_back(std::make_unique(false, false)); + else if (function_name_lowercase == "ltrim") + storage.push_back(std::make_unique(true, false)); + else if (function_name_lowercase == "rtrim") + storage.push_back(std::make_unique(false, true)); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + storage.push_back(std::make_unique("plus")); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + storage.push_back(std::make_unique("minus")); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + storage.push_back(std::make_unique()); else - { - next = Action::OPERAND; - - String function_name = getIdentifierName(tmp); - String function_name_lowercase = Poco::toLower(function_name); - - if (function_name_lowercase == "cast") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "extract") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "substring") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "position") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "exists") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "trim") - storage.push_back(std::make_unique(false, false)); - else if (function_name_lowercase == "ltrim") - storage.push_back(std::make_unique(true, false)); - else if (function_name_lowercase == "rtrim") - storage.push_back(std::make_unique(false, true)); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - storage.push_back(std::make_unique("plus")); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - storage.push_back(std::make_unique("minus")); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - storage.push_back(std::make_unique()); - else - { - bool has_all = false; - bool has_distinct = false; - - auto pos_after_bracket = pos; - auto old_expected = expected; - - ParserKeyword all("ALL"); - ParserKeyword distinct("DISTINCT"); - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_all = false; - has_distinct = false; - } - } - - if (has_distinct) - function_name += "Distinct"; - - storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, function_name)); - } - } + storage.push_back(std::make_unique(function_name)); } else { storage.back()->pushOperand(std::move(tmp)); } } + else if (substitution_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + } else if (pos->type == TokenType::OpeningRoundBracket) { if (ParserSubquery().parse(pos, tmp, expected)) @@ -1964,25 +2156,15 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) continue; } next = Action::OPERAND; - storage.push_back(std::make_unique(TokenType::ClosingRoundBracket, "tuple_")); + storage.push_back(std::make_unique()); ++pos; } else if (pos->type == TokenType::OpeningSquareBracket) { ++pos; - /// Special case for empty array: [] - if (pos->type == TokenType::ClosingSquareBracket) - { - ++pos; - auto function = makeASTFunction("array"); - storage.back()->pushOperand(function); - } - else - { - next = Action::OPERAND; - storage.push_back(std::make_unique(TokenType::ClosingSquareBracket, "array")); - } + next = Action::OPERAND; + storage.push_back(std::make_unique()); } else { @@ -2027,7 +2209,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage.back()->pushOperator(op); if (op.func_name == "arrayElement") - storage.push_back(std::make_unique(TokenType::ClosingSquareBracket)); + storage.push_back(std::make_unique()); // isNull & isNotNull is postfix unary operator if (op.func_name == "isNull" || op.func_name == "isNotNull") diff --git a/tests/queries/0_stateless/01062_max_parser_depth.reference b/tests/queries/0_stateless/01062_max_parser_depth.reference index 3efc4f06710..d945ae385f3 100644 --- a/tests/queries/0_stateless/01062_max_parser_depth.reference +++ b/tests/queries/0_stateless/01062_max_parser_depth.reference @@ -1,4 +1,3 @@ - -Maximum parse depth (42) exceeded. - -Maximum parse depth (20) exceeded. + diff --git a/tests/queries/0_stateless/01196_max_parser_depth.reference b/tests/queries/0_stateless/01196_max_parser_depth.reference index 072fc270acd..3a346222e4c 100644 --- a/tests/queries/0_stateless/01196_max_parser_depth.reference +++ b/tests/queries/0_stateless/01196_max_parser_depth.reference @@ -1,4 +1,3 @@ -Code: 306 -Code: 306 -Code: 306 -Code: 306 +Code: 167 +Code: 167 +Code: 167 diff --git a/tests/queries/0_stateless/01196_max_parser_depth.sh b/tests/queries/0_stateless/01196_max_parser_depth.sh index ae4851bf0c3..57283feb7f0 100755 --- a/tests/queries/0_stateless/01196_max_parser_depth.sh +++ b/tests/queries/0_stateless/01196_max_parser_depth.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) { printf "select "; for _ in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for _ in {1..1000}; do printf ")"; done; } > "${CLICKHOUSE_TMP}"/query -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 306' -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 306' -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 306' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 167' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 167' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 167' From b9b0c4c637269d9af9f979317d81403a5ac763ed Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 22 Jun 2022 21:59:20 +0000 Subject: [PATCH 028/173] Improvements --- src/Parsers/ExpressionListParsers.cpp | 241 +++++++++++------- ...1019_alter_materialized_view_consistent.sh | 6 +- .../01196_max_parser_depth.reference | 2 + 3 files changed, 158 insertions(+), 91 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6fcc92cb120..f3614aba7a7 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -615,6 +615,55 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe // return makeASTFunction("and", res); // } +ASTPtr makeBetweenOperator(bool negative, ASTs arguments) +{ + // subject = arguments[0], left = arguments[1], right = arguments[2] + auto f_combined_expression = std::make_shared(); + auto args_combined_expression = std::make_shared(); + + /// [NOT] BETWEEN left AND right + auto f_left_expr = std::make_shared(); + auto args_left_expr = std::make_shared(); + + auto f_right_expr = std::make_shared(); + auto args_right_expr = std::make_shared(); + + args_left_expr->children.emplace_back(arguments[0]); + args_left_expr->children.emplace_back(arguments[1]); + + args_right_expr->children.emplace_back(arguments[0]); + args_right_expr->children.emplace_back(arguments[2]); + + if (negative) + { + /// NOT BETWEEN + f_left_expr->name = "less"; + f_right_expr->name = "greater"; + f_combined_expression->name = "or"; + } + else + { + /// BETWEEN + f_left_expr->name = "greaterOrEquals"; + f_right_expr->name = "lessOrEquals"; + f_combined_expression->name = "and"; + } + + f_left_expr->arguments = args_left_expr; + f_left_expr->children.emplace_back(f_left_expr->arguments); + + f_right_expr->arguments = args_right_expr; + f_right_expr->children.emplace_back(f_right_expr->arguments); + + args_combined_expression->children.emplace_back(f_left_expr); + args_combined_expression->children.emplace_back(f_right_expr); + + f_combined_expression->arguments = args_combined_expression; + f_combined_expression->children.emplace_back(f_combined_expression->arguments); + + return f_combined_expression; +} + namespace ErrorCodes { extern const int SYNTAX_ERROR; @@ -767,51 +816,7 @@ public: if (!lastNOperands(arguments, 3)) return false; - // subject = arguments[0], left = arguments[1], right = arguments[2] - auto f_combined_expression = std::make_shared(); - auto args_combined_expression = std::make_shared(); - - /// [NOT] BETWEEN left AND right - auto f_left_expr = std::make_shared(); - auto args_left_expr = std::make_shared(); - - auto f_right_expr = std::make_shared(); - auto args_right_expr = std::make_shared(); - - args_left_expr->children.emplace_back(arguments[0]); - args_left_expr->children.emplace_back(arguments[1]); - - args_right_expr->children.emplace_back(arguments[0]); - args_right_expr->children.emplace_back(arguments[2]); - - if (negative) - { - /// NOT BETWEEN - f_left_expr->name = "less"; - f_right_expr->name = "greater"; - f_combined_expression->name = "or"; - } - else - { - /// BETWEEN - f_left_expr->name = "greaterOrEquals"; - f_right_expr->name = "lessOrEquals"; - f_combined_expression->name = "and"; - } - - f_left_expr->arguments = args_left_expr; - f_left_expr->children.emplace_back(f_left_expr->arguments); - - f_right_expr->arguments = args_right_expr; - f_right_expr->children.emplace_back(f_right_expr->arguments); - - args_combined_expression->children.emplace_back(f_left_expr); - args_combined_expression->children.emplace_back(f_right_expr); - - f_combined_expression->arguments = args_combined_expression; - f_combined_expression->children.emplace_back(f_combined_expression->arguments); - - func = f_combined_expression; + func = makeBetweenOperator(negative, arguments); } else { @@ -987,9 +992,6 @@ public: } } - if (has_distinct) - func_name += "Distinct"; - contents_begin = pos->begin; } @@ -1039,6 +1041,39 @@ public: parameters = std::make_shared(); std::swap(parameters->children, result); action = Action::OPERAND; + + /// Parametric aggregate functions cannot have DISTINCT in parameters list. + if (has_distinct) + return false; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_distinct = false; + } + } } else { @@ -1049,6 +1084,9 @@ public: if (state == 2) { + if (has_distinct) + func_name += "Distinct"; + auto function_node = makeASTFunction(func_name, std::move(result)); if (parameters) @@ -1971,46 +2009,46 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ - {"+", Operator("plus", 20, 2)}, // Base arithmetic - {"-", Operator("minus", 20, 2)}, - {"*", Operator("multiply", 30, 2)}, - {"/", Operator("divide", 30, 2)}, - {"%", Operator("modulo", 30, 2)}, - {"MOD", Operator("modulo", 30, 2)}, - {"DIV", Operator("intDiv", 30, 2)}, - {"==", Operator("equals", 10, 2)}, // Base logic - {"!=", Operator("notEquals", 10, 2)}, - {"<>", Operator("notEquals", 10, 2)}, - {"<=", Operator("lessOrEquals", 10, 2)}, - {">=", Operator("greaterOrEquals", 10, 2)}, - {"<", Operator("less", 10, 2)}, - {">", Operator("greater", 10, 2)}, - {"=", Operator("equals", 10, 2)}, - {"AND", Operator("and", 5, 2)}, // AND OR - {"OR", Operator("or", 4, 2)}, - {"||", Operator("concat", 30, 2)}, // concat() func - {".", Operator("tupleElement", 40, 2)}, // tupleElement() func - {"IS NULL", Operator("isNull", 9, 1)}, // IS (NOT) NULL + {"+", Operator("plus", 20)}, // Base arithmetic + {"-", Operator("minus", 20)}, + {"*", Operator("multiply", 30)}, + {"/", Operator("divide", 30)}, + {"%", Operator("modulo", 30)}, + {"MOD", Operator("modulo", 30)}, + {"DIV", Operator("intDiv", 30)}, + {"==", Operator("equals", 10)}, // Base logic + {"!=", Operator("notEquals", 10)}, + {"<>", Operator("notEquals", 10)}, + {"<=", Operator("lessOrEquals", 10)}, + {">=", Operator("greaterOrEquals", 10)}, + {"<", Operator("less", 10)}, + {">", Operator("greater", 10)}, + {"=", Operator("equals", 10)}, + {"AND", Operator("and", 5)}, // AND OR + {"OR", Operator("or", 4)}, + {"||", Operator("concat", 11)}, // concat() func + {".", Operator("tupleElement", 40)}, // tupleElement() func + {"IS NULL", Operator("isNull", 9, 1)}, // IS (NOT) NULL {"IS NOT NULL", Operator("isNotNull", 9, 1)}, - {"LIKE", Operator("like", 10, 2)}, // LIKE funcs - {"ILIKE", Operator("ilike", 10, 2)}, - {"NOT LIKE", Operator("notLike", 10, 2)}, - {"NOT ILIKE", Operator("notILike", 10, 2)}, - {"IN", Operator("in", 10, 2)}, // IN funcs - {"NOT IN", Operator("notIn", 10, 2)}, - {"GLOBAL IN", Operator("globalIn", 10, 2)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 10, 2)}, - {"?", Operator("if_1", 3, 0)}, - {":", Operator("if", 4, 3)}, - {"BETWEEN", Operator("between_1", 5, 0)}, - {"NOT BETWEEN", Operator("not_between_1", 5, 0)}, - {"[", Operator("arrayElement", 40, 2)}, // Layer is added in the process - {"::", Operator("CAST", 50, 2)} + {"LIKE", Operator("like", 10)}, // LIKE funcs + {"ILIKE", Operator("ilike", 10)}, + {"NOT LIKE", Operator("notLike", 10)}, + {"NOT ILIKE", Operator("notILike", 10)}, + {"IN", Operator("in", 10)}, // IN funcs + {"NOT IN", Operator("notIn", 10)}, + {"GLOBAL IN", Operator("globalIn", 10)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 10)}, + {"?", Operator("if_1", 3, 0)}, + {":", Operator("if", 4, 3)}, + {"BETWEEN", Operator("between_1", 7, 0)}, + {"NOT BETWEEN", Operator("not_between_1", 7, 0)}, + {"[", Operator("arrayElement", 40)}, // Layer is added in the process + {"::", Operator("CAST", 40)} }); static std::vector> op_table_unary({ - {"-", Operator("negate", 39, 1)}, - {"NOT", Operator("not", 9, 1)} + {"-", Operator("negate", 39, 1)}, + {"NOT", Operator("not", 6, 1)} }); ParserCompoundIdentifier identifier_parser(false, true); @@ -2192,17 +2230,44 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (op.func_name == "and" && storage.back()->hasBetween()) { storage.back()->subBetween(); - op = Operator("between_2", 6, 0); + op = Operator("between_2", 8, 0); } while (storage.back()->previousPriority() >= op.priority) { + ASTPtr func; Operator prev_op; storage.back()->popOperator(prev_op); - auto func = makeASTFunction(prev_op.func_name); - if (!storage.back()->lastNOperands(func->children[0]->children, prev_op.arity)) - return false; + if ((op.func_name == "and" && prev_op.func_name == "and") || + (op.func_name == "or" && prev_op.func_name == "or") || + (op.func_name == "concat" && prev_op.func_name == "concat")) + { + op.arity += prev_op.arity - 1; + break; + } + + if (prev_op.func_name == "between_2") + { + Operator prev_prev_op; + if (!storage.back()->popOperator(prev_prev_op) || !(prev_prev_op.func_name == "between_1" || prev_prev_op.func_name == "not_between_1")) + return false; + + bool negative = prev_prev_op.func_name == "not_between_1"; + + ASTs arguments; + if (!storage.back()->lastNOperands(arguments, 3)) + return false; + + func = makeBetweenOperator(negative, arguments); + } + else + { + func = makeASTFunction(prev_op.func_name); + + if (!storage.back()->lastNOperands(func->children[0]->children, prev_op.arity)) + return false; + } storage.back()->pushOperand(func); } diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh index e90085f4e8e..b4fc336713b 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh @@ -39,7 +39,7 @@ function insert_thread() { done wait - is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;") + is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv;") if [ "$is_done" -eq "1" ]; then break @@ -58,7 +58,7 @@ function alter_thread() { -q "${ALTER[$RANDOM % 2]}" sleep "0.0$RANDOM" - is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;") + is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv;") if [ "$is_done" -eq "1" ]; then break @@ -75,7 +75,7 @@ timeout 120 bash -c alter_thread & wait -$CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv LIMIT 1;" +$CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv LIMIT 1;" $CLICKHOUSE_CLIENT -q "SELECT 'inconsistencies', count() FROM mv WHERE test == 0;" $CLICKHOUSE_CLIENT -q "DROP VIEW mv" diff --git a/tests/queries/0_stateless/01196_max_parser_depth.reference b/tests/queries/0_stateless/01196_max_parser_depth.reference index 3a346222e4c..d2222a8b895 100644 --- a/tests/queries/0_stateless/01196_max_parser_depth.reference +++ b/tests/queries/0_stateless/01196_max_parser_depth.reference @@ -1,3 +1,5 @@ Code: 167 Code: 167 Code: 167 +Code: 167 +Code: 167 From aba44f9fb8f2fb19d8e9cf8938d4a43d73039fb9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 23 Jun 2022 08:18:14 +0000 Subject: [PATCH 029/173] Add GROUPING, ANY/ALL operators, fixes --- src/Parsers/ExpressionListParsers.cpp | 102 ++++++++++++++++-- .../01062_max_parser_depth.reference | 2 +- .../0_stateless/01062_max_parser_depth.sh | 1 + 3 files changed, 93 insertions(+), 12 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4a59664f6e8..ec5f50ddf04 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -679,13 +679,17 @@ public: { } - Operator(String func_name_, Int32 priority_, Int32 arity_ = 2) : func_name(func_name_), priority(priority_), arity(arity_) + Operator(String func_name_, + Int32 priority_, + Int32 arity_ = 2, + bool any_all_ = false) : func_name(func_name_), priority(priority_), arity(arity_), any_all(any_all_) { } String func_name; Int32 priority; Int32 arity; + bool any_all; }; enum Action @@ -710,9 +714,17 @@ public: return true; } - void pushOperator(Operator op) + void pushOperator(Operator op, bool count = true) { + if (count && op.func_name != "and" && op.func_name != "or" && op.func_name != "concat") + { + ++depth_diff; + ++depth_total; + } + operators.push_back(std::move(op)); + + // LOG_FATAL(&Poco::Logger::root(), "#push {}: diff = {}, total = {}", op.func_name, depth_diff, depth_total); } bool popOperand(ASTPtr & op) @@ -840,6 +852,13 @@ public: else pushOperand(node); + // LOG_FATAL(&Poco::Logger::root(), "#wrap-before: diff = {}, total = {}", depth_diff, depth_total); + + depth_diff -= depth_total; + depth_total = 0; + + // LOG_FATAL(&Poco::Logger::root(), "#wrap-after: diff = {}, total = {}", depth_diff, depth_total); + return res; } @@ -925,6 +944,16 @@ public: return open_between > 0; } + void syncDepth(IParser::Pos & pos) + { + // LOG_FATAL(&Poco::Logger::root(), "#sync: diff = {}", depth_diff); + for (; depth_diff > 0; --depth_diff) + pos.increaseDepth(); + + for (; depth_diff < 0; ++depth_diff) + pos.decreaseDepth(); + } + protected: std::vector operators; ASTs operands; @@ -932,6 +961,8 @@ protected: int state = 0; int open_between = 0; + int depth_diff = 1; + int depth_total = 1; }; class FunctionLayer : public Layer @@ -2016,14 +2047,14 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"%", Operator("modulo", 30)}, {"MOD", Operator("modulo", 30)}, {"DIV", Operator("intDiv", 30)}, - {"==", Operator("equals", 10)}, // Base logic - {"!=", Operator("notEquals", 10)}, - {"<>", Operator("notEquals", 10)}, - {"<=", Operator("lessOrEquals", 10)}, - {">=", Operator("greaterOrEquals", 10)}, - {"<", Operator("less", 10)}, - {">", Operator("greater", 10)}, - {"=", Operator("equals", 10)}, + {"==", Operator("equals", 10, 2, true)}, // Base logic + {"!=", Operator("notEquals", 10, 2, true)}, + {"<>", Operator("notEquals", 10, 2, true)}, + {"<=", Operator("lessOrEquals", 10, 2, true)}, + {">=", Operator("greaterOrEquals", 10, 2, true)}, + {"<", Operator("less", 10, 2, true)}, + {">", Operator("greater", 10, 2, true)}, + {"=", Operator("equals", 10, 2, true)}, {"AND", Operator("and", 5)}, // AND OR {"OR", Operator("or", 4)}, {"||", Operator("concat", 11)}, // concat() func @@ -2077,6 +2108,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!storage.back()->parse(pos, expected, next)) return false; + storage.back()->syncDepth(pos); + if (storage.back()->isFinished()) { next = Action::OPERATOR; @@ -2096,12 +2129,55 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr tmp; /// Special case for cast expression - if (ParseCastExpression(pos, tmp, expected)) + Operator prev_op; + bool parse_cast = true; + bool any_all = true; + if (storage.back()->popOperator(prev_op)) + { + storage.back()->pushOperator(prev_op, false); + if (prev_op.func_name == "tupleElement") + parse_cast = false; + any_all = prev_op.any_all; + } + if (parse_cast && ParseCastExpression(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); continue; } + if (any_all) + { + auto old_pos = pos; + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + + if (ParserKeyword("ANY").ignore(pos, expected) && ParserSubquery().parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (ParserKeyword("ALL").ignore(pos, expected) && ParserSubquery().parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + + if (subquery_function_type != SubqueryFunctionType::NONE) + { + ASTPtr function, argument; + + if (!storage.back()->popOperator(prev_op)) + return false; + if (!storage.back()->popOperand(argument)) + return false; + + function = makeASTFunction(prev_op.func_name, argument, tmp); + + if (!modifyAST(function, subquery_function_type)) + return false; + + storage.back()->pushOperand(std::move(function)); + continue; + } + else + { + pos = old_pos; + } + } + /// Try to find any unary operators auto cur_op = op_table_unary.begin(); for (; cur_op != op_table_unary.end(); ++cur_op) @@ -2174,6 +2250,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") storage.push_back(std::make_unique()); + else if (function_name_lowercase == "grouping") + storage.push_back(std::make_unique(function_name_lowercase)); else storage.push_back(std::make_unique(function_name)); } @@ -2327,6 +2405,8 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!storage.back()->getResult(node)) return false; + storage.back()->syncDepth(pos); + return true; } diff --git a/tests/queries/0_stateless/01062_max_parser_depth.reference b/tests/queries/0_stateless/01062_max_parser_depth.reference index d945ae385f3..fc9c24676eb 100644 --- a/tests/queries/0_stateless/01062_max_parser_depth.reference +++ b/tests/queries/0_stateless/01062_max_parser_depth.reference @@ -1,3 +1,3 @@ - - - +- diff --git a/tests/queries/0_stateless/01062_max_parser_depth.sh b/tests/queries/0_stateless/01062_max_parser_depth.sh index 27593272f92..baebd7becd8 100755 --- a/tests/queries/0_stateless/01062_max_parser_depth.sh +++ b/tests/queries/0_stateless/01062_max_parser_depth.sh @@ -9,3 +9,4 @@ echo - echo 'select (1+1)*(2+1)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=42" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." echo - echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=20" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." +echo - From 2a496bdef146adc68f4b721a631e50e610b710d7 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 24 Jun 2022 11:41:13 +0000 Subject: [PATCH 030/173] Small fixes --- src/Parsers/ExpressionListParsers.cpp | 65 +++++++-------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ec5f50ddf04..787781de0f4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -603,17 +603,6 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return elem_parser.parse(pos, node, expected); } -//// Chaining: a < b < c < d becomes (a < b) AND (b < c) AND (c < d) -// ATRPtr chain(std::vector operators, std::vector operands) -// { -// ASTPtrs res; -// res.reserve(operators.size()); -// for (size_t i = 0; i < operators.size(); i++) -// { -// res.push_back(makeASTFunction(operators[i].func_name, {operands[i], operands[i + 1]})); -// } -// return makeASTFunction("and", res); -// } ASTPtr makeBetweenOperator(bool negative, ASTs arguments) { @@ -675,9 +664,7 @@ namespace ErrorCodes class Operator { public: - Operator() - { - } + Operator() = default; Operator(String func_name_, Int32 priority_, @@ -705,7 +692,7 @@ public: bool popOperator(Operator & op) { - if (operators.size() == 0) + if (operators.empty()) return false; op = std::move(operators.back()); @@ -723,13 +710,11 @@ public: } operators.push_back(std::move(op)); - - // LOG_FATAL(&Poco::Logger::root(), "#push {}: diff = {}, total = {}", op.func_name, depth_diff, depth_total); } bool popOperand(ASTPtr & op) { - if (operands.size() == 0) + if (operands.empty()) return false; op = std::move(operands.back()); @@ -764,12 +749,12 @@ public: return true; } - bool isFinished() + bool isFinished() const { return state == -1; } - int previousPriority() + int previousPriority() const { if (operators.empty()) return 0; @@ -777,7 +762,7 @@ public: return operators.back().priority; } - int empty() + int empty() const { return operators.empty() && operands.empty(); } @@ -852,13 +837,9 @@ public: else pushOperand(node); - // LOG_FATAL(&Poco::Logger::root(), "#wrap-before: diff = {}, total = {}", depth_diff, depth_total); - depth_diff -= depth_total; depth_total = 0; - // LOG_FATAL(&Poco::Logger::root(), "#wrap-after: diff = {}, total = {}", depth_diff, depth_total); - return res; } @@ -939,14 +920,13 @@ public: --open_between; } - bool hasBetween() + bool hasBetween() const { return open_between > 0; } void syncDepth(IParser::Pos & pos) { - // LOG_FATAL(&Poco::Logger::root(), "#sync: diff = {}", depth_diff); for (; depth_diff > 0; --depth_diff) pos.increaseDepth(); @@ -972,21 +952,6 @@ public: { } - // bool getResult(ASTPtr & op) override - // { - // auto func = makeASTFunction(func_name, std::move(res)); - - // if (parameters) - // { - // func->parameters = parameters; - // func->children.push_back(func->parameters); - // } - - // op = func; - - // return true; - // } - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (state == 0) @@ -1341,7 +1306,7 @@ public: { if (parsed_interval_kind) { - if (result.size() == 0) + if (result.empty()) return false; op = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); @@ -1700,7 +1665,7 @@ private: class DateAddLayer : public Layer { public: - DateAddLayer(const char * function_name_) : function_name(function_name_) + explicit DateAddLayer(const char * function_name_) : function_name(function_name_) { } @@ -1913,7 +1878,7 @@ public: { if (ParserKeyword("WHEN").ignore(pos, expected)) { - if ((has_case_expr || result.size() > 0) && !wrapLayer()) + if ((has_case_expr || !result.empty()) && !wrapLayer()) return false; action = Action::OPERAND; @@ -2089,6 +2054,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserTupleOfLiterals tuple_literal_parser; ParserArrayOfLiterals array_literal_parser; ParserSubstitution substitution_parser; + ParserMySQLGlobalVariable mysql_global_variable_parser; ParserKeyword filter("FILTER"); ParserKeyword over("OVER"); @@ -2104,7 +2070,6 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) while (pos.isValid()) { - // LOG_FATAL(&Poco::Logger::root(), "#pos: {}", String(pos->begin, pos->size())); if (!storage.back()->parse(pos, expected, next)) return false; @@ -2215,7 +2180,6 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (identifier_parser.parse(pos, tmp, expected)) { - /// If the next token is '(' then it is a plain function, '[' - arrayElement function if (pos->type == TokenType::OpeningRoundBracket) { ++pos; @@ -2282,6 +2246,10 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERAND; storage.push_back(std::make_unique()); } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + } else { break; @@ -2304,7 +2272,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto op = cur_op->second; - // AND can be both boolean function and part of the BETWEEN ... AND ... operator + // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.func_name == "and" && storage.back()->hasBetween()) { storage.back()->subBetween(); @@ -2396,6 +2364,7 @@ bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + // Check if we only have one starting layer if (storage.size() > 1) return false; From 9b8edc808c2941fc9cc2fc1e12b333fa80cd4ace Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 24 Jun 2022 15:42:44 +0000 Subject: [PATCH 031/173] Replace ParserExpression everywhere --- src/Parsers/ExpressionListParsers.cpp | 4 ++-- src/Parsers/ExpressionListParsers.h | 7 ++----- src/Parsers/ParserSelectQuery.cpp | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 787781de0f4..c6f98275126 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -948,7 +948,7 @@ protected: class FunctionLayer : public Layer { public: - FunctionLayer(String func_name_) : func_name(func_name_) + explicit FunctionLayer(String func_name_) : func_name(func_name_) { } @@ -2002,7 +2002,7 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } -bool ParserExpression2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ {"+", Operator("plus", 20)}, // Base arithmetic diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index e9ac5058b11..4aef96daec9 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -462,7 +462,7 @@ protected: }; -using ParserExpression = ParserLambdaExpression; +using ParserExpression2 = ParserLambdaExpression; class ParserExpressionWithOptionalAlias : public IParserBase @@ -510,11 +510,8 @@ protected: }; -class ParserExpression2 : public IParserBase +class ParserExpression : public IParserBase { -private: - ParserTernaryOperatorExpression elem_parser; - protected: const char * getName() const override { return "lambda expression"; } diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index f662cdac697..8622cc9f615 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -164,7 +164,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) // TEST ParserToken test(TokenType::DollarSign); - if (test.ignore(pos, expected)) + if (!test.ignore(pos, expected)) { if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) return false; From 7299f81739f85f0b39e368ba5c2fa3ae558b0245 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 24 Jun 2022 22:33:20 +0000 Subject: [PATCH 032/173] Fixes --- src/Parsers/ExpressionListParsers.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index c6f98275126..63d21b0f7dc 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -703,10 +703,18 @@ public: void pushOperator(Operator op, bool count = true) { - if (count && op.func_name != "and" && op.func_name != "or" && op.func_name != "concat") + if (count) { - ++depth_diff; - ++depth_total; + if (op.func_name != "and" && op.func_name != "or" && op.func_name != "concat") + { + ++depth_diff; + ++depth_total; + } + else + { + depth_diff -= depth_total; + depth_total = 0; + } } operators.push_back(std::move(op)); @@ -2260,6 +2268,10 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERAND; ASTPtr tmp; + Expected stub; + if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) + break; + /// Try to find operators from 'op_table' auto cur_op = op_table.begin(); for (; cur_op != op_table.end(); ++cur_op) From 84294aefc9f51f1f651e8dd0c3276f37b1fdfc92 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 27 Jun 2022 13:49:37 +0000 Subject: [PATCH 033/173] Fix unit test --- src/Parsers/ExpressionListParsers.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 63d21b0f7dc..91c695bb457 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1179,6 +1179,12 @@ public: if (!wrapLayer()) return false; + // Special case for (('a', 'b')) -> tuple(('a', 'b')) + if (!is_tuple && result.size() == 1) + if (auto * literal = result[0]->as()) + if (literal->value.getType() == Field::Types::Tuple) + is_tuple = true; + state = -1; } From b94a5c105134b42e4c951f7459a715b44075160a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 29 Jun 2022 21:51:58 +0000 Subject: [PATCH 034/173] Refactor --- src/Parsers/ExpressionListParsers.cpp | 237 ++++++++++++++------------ 1 file changed, 129 insertions(+), 108 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 91c695bb457..40338b709bc 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -28,6 +28,11 @@ using namespace std::literals; namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + const char * ParserMultiplicativeExpression::operators[] = { "*", "multiply", @@ -653,12 +658,28 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments) return f_combined_expression; } -namespace ErrorCodes +enum Action { - extern const int SYNTAX_ERROR; -} + OPERAND, + OPERATOR +}; + +enum OperatorType +{ + None, + Comparison, + Mergeable, + ArrayElement, + TupleElement, + IsNull, + StartBetween, + StartNotBetween, + FinishBetween, + StartIf, + FinishIf, + Cast +}; -//////////////////////////////////////////////////////////////////////////////////////// // class Operator: // - defines structure of certain operator class Operator @@ -666,23 +687,17 @@ class Operator public: Operator() = default; - Operator(String func_name_, + Operator(String function_name_, Int32 priority_, Int32 arity_ = 2, - bool any_all_ = false) : func_name(func_name_), priority(priority_), arity(arity_), any_all(any_all_) + OperatorType type_ = OperatorType::None) : function_name(function_name_), priority(priority_), arity(arity_), type(type_) { } - String func_name; + String function_name; Int32 priority; Int32 arity; - bool any_all; -}; - -enum Action -{ - OPERAND, - OPERATOR + OperatorType type; }; class Layer @@ -705,7 +720,7 @@ public: { if (count) { - if (op.func_name != "and" && op.func_name != "or" && op.func_name != "concat") + if (op.type != OperatorType::Mergeable) { ++depth_diff; ++depth_total; @@ -770,6 +785,14 @@ public: return operators.back().priority; } + OperatorType previousType() const + { + if (operators.empty()) + return OperatorType::None; + + return operators.back().type; + } + int empty() const { return operators.empty() && operands.empty(); @@ -795,27 +818,30 @@ public: ASTPtr func; // Special case of ternary operator - if (cur_op.func_name == "if_1") + if (cur_op.type == OperatorType::StartIf) return false; - if (cur_op.func_name == "if") + if (cur_op.type == OperatorType::FinishIf) { Operator tmp; - if (!popOperator(tmp) || tmp.func_name != "if_1") + if (!popOperator(tmp) || tmp.type != OperatorType::StartIf) return false; } // Special case of a BETWEEN b AND c operator - if (cur_op.func_name == "between_1" || cur_op.func_name == "not_between_1") + if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) return false; - if (cur_op.func_name == "between_2") + if (cur_op.type == OperatorType::FinishBetween) { - Operator tmp; - if (!popOperator(tmp) || !(tmp.func_name == "between_1" || tmp.func_name == "not_between_1")) + Operator tmp_op; + if (!popOperator(tmp_op)) return false; - bool negative = tmp.func_name == "not_between_1"; + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return false; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; ASTs arguments; if (!lastNOperands(arguments, 3)) @@ -825,7 +851,7 @@ public: } else { - func = makeASTFunction(cur_op.func_name); + func = makeASTFunction(cur_op.function_name); if (!lastNOperands(func->children[0]->children, cur_op.arity)) return false; @@ -956,7 +982,7 @@ protected: class FunctionLayer : public Layer { public: - explicit FunctionLayer(String func_name_) : func_name(func_name_) + explicit FunctionLayer(String function_name_) : function_name(function_name_) { } @@ -1022,7 +1048,7 @@ public: * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, * and the query silently returns an unexpected result. */ - if (func_name == "toDate" + if (function_name == "toDate" && contents_end - contents_begin == strlen("2014-01-01") && contents_begin[0] >= '2' && contents_begin[0] <= '3' && contents_begin[1] >= '0' && contents_begin[1] <= '9' @@ -1089,9 +1115,9 @@ public: if (state == 2) { if (has_distinct) - func_name += "Distinct"; + function_name += "Distinct"; - auto function_node = makeASTFunction(func_name, std::move(result)); + auto function_node = makeASTFunction(function_name, std::move(result)); if (parameters) { @@ -1142,7 +1168,7 @@ private: const char * contents_begin; const char * contents_end; - String func_name; + String function_name; ASTPtr parameters; }; @@ -1496,7 +1522,7 @@ public: { ASTPtr node; - // Recursion :'( + // Recursion if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) return false; @@ -1522,7 +1548,7 @@ public: bool getResult(ASTPtr & op) override { - op = makeASTFunction(func_name, std::move(result)); + op = makeASTFunction(function_name, std::move(result)); return true; } @@ -1604,7 +1630,7 @@ public: to_remove, std::make_shared("]+$") }; - func_name = "replaceRegexpAll"; + function_name = "replaceRegexpAll"; } else { @@ -1625,7 +1651,7 @@ public: std::make_shared("]+$") }; } - func_name = "replaceRegexpOne"; + function_name = "replaceRegexpOne"; } pattern_func_node->name = "concat"; @@ -1638,18 +1664,18 @@ public: { if (trim_left && trim_right) { - func_name = "trimBoth"; + function_name = "trimBoth"; } else { if (trim_left) { - func_name = "trimLeft"; + function_name = "trimLeft"; } else { /// trim_right == false not possible - func_name = "trimRight"; + function_name = "trimRight"; } } } @@ -1672,7 +1698,7 @@ private: bool char_override = false; ASTPtr to_remove; - String func_name; + String function_name; }; @@ -2019,41 +2045,41 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ - {"+", Operator("plus", 20)}, // Base arithmetic - {"-", Operator("minus", 20)}, - {"*", Operator("multiply", 30)}, - {"/", Operator("divide", 30)}, - {"%", Operator("modulo", 30)}, - {"MOD", Operator("modulo", 30)}, - {"DIV", Operator("intDiv", 30)}, - {"==", Operator("equals", 10, 2, true)}, // Base logic - {"!=", Operator("notEquals", 10, 2, true)}, - {"<>", Operator("notEquals", 10, 2, true)}, - {"<=", Operator("lessOrEquals", 10, 2, true)}, - {">=", Operator("greaterOrEquals", 10, 2, true)}, - {"<", Operator("less", 10, 2, true)}, - {">", Operator("greater", 10, 2, true)}, - {"=", Operator("equals", 10, 2, true)}, - {"AND", Operator("and", 5)}, // AND OR - {"OR", Operator("or", 4)}, - {"||", Operator("concat", 11)}, // concat() func - {".", Operator("tupleElement", 40)}, // tupleElement() func - {"IS NULL", Operator("isNull", 9, 1)}, // IS (NOT) NULL - {"IS NOT NULL", Operator("isNotNull", 9, 1)}, - {"LIKE", Operator("like", 10)}, // LIKE funcs - {"ILIKE", Operator("ilike", 10)}, - {"NOT LIKE", Operator("notLike", 10)}, - {"NOT ILIKE", Operator("notILike", 10)}, - {"IN", Operator("in", 10)}, // IN funcs - {"NOT IN", Operator("notIn", 10)}, - {"GLOBAL IN", Operator("globalIn", 10)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 10)}, - {"?", Operator("if_1", 3, 0)}, - {":", Operator("if", 4, 3)}, - {"BETWEEN", Operator("between_1", 7, 0)}, - {"NOT BETWEEN", Operator("not_between_1", 7, 0)}, - {"[", Operator("arrayElement", 40)}, // Layer is added in the process - {"::", Operator("CAST", 40)} + {"+", Operator("plus", 20)}, + {"-", Operator("minus", 20)}, + {"*", Operator("multiply", 30)}, + {"/", Operator("divide", 30)}, + {"%", Operator("modulo", 30)}, + {"MOD", Operator("modulo", 30)}, + {"DIV", Operator("intDiv", 30)}, + {"==", Operator("equals", 10, 2, OperatorType::Comparison)}, + {"!=", Operator("notEquals", 10, 2, OperatorType::Comparison)}, + {"<>", Operator("notEquals", 10, 2, OperatorType::Comparison)}, + {"<=", Operator("lessOrEquals", 10, 2, OperatorType::Comparison)}, + {">=", Operator("greaterOrEquals", 10, 2, OperatorType::Comparison)}, + {"<", Operator("less", 10, 2, OperatorType::Comparison)}, + {">", Operator("greater", 10, 2, OperatorType::Comparison)}, + {"=", Operator("equals", 10, 2, OperatorType::Comparison)}, + {"AND", Operator("and", 5, 2, OperatorType::Mergeable)}, + {"OR", Operator("or", 4, 2, OperatorType::Mergeable)}, + {"||", Operator("concat", 11, 2, OperatorType::Mergeable)}, + {".", Operator("tupleElement", 40, 2, OperatorType::TupleElement)}, + {"IS NULL", Operator("isNull", 9, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 9, 1, OperatorType::IsNull)}, + {"LIKE", Operator("like", 10)}, + {"ILIKE", Operator("ilike", 10)}, + {"NOT LIKE", Operator("notLike", 10)}, + {"NOT ILIKE", Operator("notILike", 10)}, + {"IN", Operator("in", 10)}, + {"NOT IN", Operator("notIn", 10)}, + {"GLOBAL IN", Operator("globalIn", 10)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 10)}, + {"?", Operator("", 3, 0, OperatorType::StartIf)}, + {":", Operator("if", 4, 3, OperatorType::FinishIf)}, + {"BETWEEN", Operator("", 7, 0, OperatorType::StartBetween)}, + {"NOT BETWEEN", Operator("", 7, 0, OperatorType::StartNotBetween)}, + {"[", Operator("arrayElement", 40, 2, OperatorType::ArrayElement)}, + {"::", Operator("CAST", 40, 2, OperatorType::Cast)} }); static std::vector> op_table_unary({ @@ -2061,6 +2087,9 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"NOT", Operator("not", 6, 1)} }); + auto finish_between_operator = Operator("", 8, 0, OperatorType::FinishBetween); + auto lambda_operator = Operator("lambda", 2, 2); + ParserCompoundIdentifier identifier_parser(false, true); ParserNumber number_parser; ParserAsterisk asterisk_parser; @@ -2070,12 +2099,13 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserSubstitution substitution_parser; ParserMySQLGlobalVariable mysql_global_variable_parser; - ParserKeyword filter("FILTER"); - ParserKeyword over("OVER"); + ParserKeyword any_parser("ANY"); + ParserKeyword all_parser("ALL"); // Recursion ParserQualifiedAsterisk qualified_asterisk_parser; ParserColumnsMatcher columns_matcher_parser; + ParserSubquery subquery_parser; Action next = Action::OPERAND; @@ -2108,34 +2138,26 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr tmp; /// Special case for cast expression - Operator prev_op; - bool parse_cast = true; - bool any_all = true; - if (storage.back()->popOperator(prev_op)) - { - storage.back()->pushOperator(prev_op, false); - if (prev_op.func_name == "tupleElement") - parse_cast = false; - any_all = prev_op.any_all; - } - if (parse_cast && ParseCastExpression(pos, tmp, expected)) + if (storage.back()->previousType() != OperatorType::TupleElement && + ParseCastExpression(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); continue; } - if (any_all) + if (storage.back()->previousType() == OperatorType::Comparison) { auto old_pos = pos; SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - if (ParserKeyword("ANY").ignore(pos, expected) && ParserSubquery().parse(pos, tmp, expected)) + if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) subquery_function_type = SubqueryFunctionType::ANY; - else if (ParserKeyword("ALL").ignore(pos, expected) && ParserSubquery().parse(pos, tmp, expected)) + else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) subquery_function_type = SubqueryFunctionType::ALL; if (subquery_function_type != SubqueryFunctionType::NONE) { + Operator prev_op; ASTPtr function, argument; if (!storage.back()->popOperator(prev_op)) @@ -2143,7 +2165,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!storage.back()->popOperand(argument)) return false; - function = makeASTFunction(prev_op.func_name, argument, tmp); + function = makeASTFunction(prev_op.function_name, argument, tmp); if (!modifyAST(function, subquery_function_type)) return false; @@ -2177,7 +2199,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (parseOperator(pos, "CASE", expected)) { - next = Action::OPERAND; // ??? + next = Action::OPERAND; storage.push_back(std::make_unique()); } else if (ParseDateOperatorExpression(pos, tmp, expected) || @@ -2244,7 +2266,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (pos->type == TokenType::OpeningRoundBracket) { - if (ParserSubquery().parse(pos, tmp, expected)) + if (subquery_parser.parse(pos, tmp, expected)) { storage.back()->pushOperand(std::move(tmp)); continue; @@ -2291,10 +2313,10 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto op = cur_op->second; // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator - if (op.func_name == "and" && storage.back()->hasBetween()) + if (op.function_name == "and" && storage.back()->hasBetween()) { storage.back()->subBetween(); - op = Operator("between_2", 8, 0); + op = finish_between_operator; } while (storage.back()->previousPriority() >= op.priority) @@ -2303,21 +2325,22 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Operator prev_op; storage.back()->popOperator(prev_op); - if ((op.func_name == "and" && prev_op.func_name == "and") || - (op.func_name == "or" && prev_op.func_name == "or") || - (op.func_name == "concat" && prev_op.func_name == "concat")) + if (op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) { op.arity += prev_op.arity - 1; break; } - if (prev_op.func_name == "between_2") + if (op.type == OperatorType::FinishBetween) { - Operator prev_prev_op; - if (!storage.back()->popOperator(prev_prev_op) || !(prev_prev_op.func_name == "between_1" || prev_prev_op.func_name == "not_between_1")) + Operator tmp_op; + if (!storage.back()->popOperator(tmp_op)) return false; - bool negative = prev_prev_op.func_name == "not_between_1"; + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return false; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; ASTs arguments; if (!storage.back()->lastNOperands(arguments, 3)) @@ -2327,7 +2350,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - func = makeASTFunction(prev_op.func_name); + func = makeASTFunction(prev_op.function_name); if (!storage.back()->lastNOperands(func->children[0]->children, prev_op.arity)) return false; @@ -2337,17 +2360,17 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } storage.back()->pushOperator(op); - if (op.func_name == "arrayElement") + if (op.type == OperatorType::ArrayElement) storage.push_back(std::make_unique()); // isNull & isNotNull is postfix unary operator - if (op.func_name == "isNull" || op.func_name == "isNotNull") + if (op.type == OperatorType::IsNull) next = Action::OPERATOR; - if (op.func_name == "between_1" || op.func_name == "not_between_1") + if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) storage.back()->addBetween(); - if (op.func_name == "CAST") + if (op.type == OperatorType::Cast) { next = Action::OPERATOR; @@ -2363,7 +2386,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!storage.back()->parseLambda()) return false; - storage.back()->pushOperator(Operator("lambda", 2, 2)); + storage.back()->pushOperator(lambda_operator); } else if (storage.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) { @@ -2397,8 +2420,6 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } -//////////////////////////////////////////////////////////////////////////////////////// - bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (ParserTableFunctionView().parse(pos, node, expected)) From 511bc3415788556549174e828766fc743fb054d1 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 29 Jun 2022 22:33:45 +0000 Subject: [PATCH 035/173] Final replace of ParserExpression --- src/Parsers/ExpressionListParsers.cpp | 82 +++++++++---------- src/TableFunctions/Hive/TableFunctionHive.cpp | 2 +- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 40338b709bc..17f552d47fc 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -694,10 +694,10 @@ public: { } - String function_name; + OperatorType type; Int32 priority; Int32 arity; - OperatorType type; + String function_name; }; class Layer @@ -2045,50 +2045,50 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { static std::vector> op_table({ - {"+", Operator("plus", 20)}, - {"-", Operator("minus", 20)}, - {"*", Operator("multiply", 30)}, - {"/", Operator("divide", 30)}, - {"%", Operator("modulo", 30)}, - {"MOD", Operator("modulo", 30)}, - {"DIV", Operator("intDiv", 30)}, - {"==", Operator("equals", 10, 2, OperatorType::Comparison)}, - {"!=", Operator("notEquals", 10, 2, OperatorType::Comparison)}, - {"<>", Operator("notEquals", 10, 2, OperatorType::Comparison)}, - {"<=", Operator("lessOrEquals", 10, 2, OperatorType::Comparison)}, - {">=", Operator("greaterOrEquals", 10, 2, OperatorType::Comparison)}, - {"<", Operator("less", 10, 2, OperatorType::Comparison)}, - {">", Operator("greater", 10, 2, OperatorType::Comparison)}, - {"=", Operator("equals", 10, 2, OperatorType::Comparison)}, - {"AND", Operator("and", 5, 2, OperatorType::Mergeable)}, - {"OR", Operator("or", 4, 2, OperatorType::Mergeable)}, - {"||", Operator("concat", 11, 2, OperatorType::Mergeable)}, - {".", Operator("tupleElement", 40, 2, OperatorType::TupleElement)}, - {"IS NULL", Operator("isNull", 9, 1, OperatorType::IsNull)}, - {"IS NOT NULL", Operator("isNotNull", 9, 1, OperatorType::IsNull)}, - {"LIKE", Operator("like", 10)}, - {"ILIKE", Operator("ilike", 10)}, - {"NOT LIKE", Operator("notLike", 10)}, - {"NOT ILIKE", Operator("notILike", 10)}, - {"IN", Operator("in", 10)}, - {"NOT IN", Operator("notIn", 10)}, - {"GLOBAL IN", Operator("globalIn", 10)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 10)}, - {"?", Operator("", 3, 0, OperatorType::StartIf)}, - {":", Operator("if", 4, 3, OperatorType::FinishIf)}, - {"BETWEEN", Operator("", 7, 0, OperatorType::StartBetween)}, - {"NOT BETWEEN", Operator("", 7, 0, OperatorType::StartNotBetween)}, - {"[", Operator("arrayElement", 40, 2, OperatorType::ArrayElement)}, - {"::", Operator("CAST", 40, 2, OperatorType::Cast)} + {"+", Operator("plus", 11)}, + {"-", Operator("minus", 11)}, + {"*", Operator("multiply", 12)}, + {"/", Operator("divide", 12)}, + {"%", Operator("modulo", 12)}, + {"MOD", Operator("modulo", 12)}, + {"DIV", Operator("intDiv", 12)}, + {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, + {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, + {"<", Operator("less", 9, 2, OperatorType::Comparison)}, + {">", Operator("greater", 9, 2, OperatorType::Comparison)}, + {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, + {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, + {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, + {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, + {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, + {"LIKE", Operator("like", 9)}, + {"ILIKE", Operator("ilike", 9)}, + {"NOT LIKE", Operator("notLike", 9)}, + {"NOT ILIKE", Operator("notILike", 9)}, + {"IN", Operator("in", 9)}, + {"NOT IN", Operator("notIn", 9)}, + {"GLOBAL IN", Operator("globalIn", 9)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, + {"?", Operator("", 2, 0, OperatorType::StartIf)}, + {":", Operator("if", 3, 3, OperatorType::FinishIf)}, + {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, + {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, + {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, + {"::", Operator("CAST", 14, 2, OperatorType::Cast)} }); static std::vector> op_table_unary({ - {"-", Operator("negate", 39, 1)}, - {"NOT", Operator("not", 6, 1)} + {"NOT", Operator("not", 5, 1)}, + {"-", Operator("negate", 13, 1)} }); - auto finish_between_operator = Operator("", 8, 0, OperatorType::FinishBetween); - auto lambda_operator = Operator("lambda", 2, 2); + auto lambda_operator = Operator("lambda", 1, 2); + auto finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); ParserCompoundIdentifier identifier_parser(false, true); ParserNumber number_parser; diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index 99dded030e5..c5b5087bed6 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -62,7 +62,7 @@ namespace DB ColumnsDescription /*cached_columns_*/) const { const Settings & settings = context_->getSettings(); - ParserLambdaExpression partition_by_parser; + ParserExpression partition_by_parser; ASTPtr partition_by_ast = parseQuery( partition_by_parser, "(" + partition_by_def + ")", From 369ff1807c048dcb95275c6843f281677f4df6c4 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 29 Jun 2022 22:37:24 +0000 Subject: [PATCH 036/173] Final replace of ParserExpression --- src/Parsers/ExpressionElementParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.h | 18 ++++++++---------- src/Parsers/ParserCreateFunctionQuery.cpp | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 045f6aad2b5..fb121de4183 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -2074,7 +2074,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr func_name; ASTPtr expr_list_args; auto opos = pos; - if (ParserLambdaExpression().parse(pos, lambda, expected)) + if (ParserExpression().parse(pos, lambda, expected)) { if (const auto * func = lambda->as(); func && func->name == "lambda") { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 17f552d47fc..e679c0c46e8 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -690,7 +690,7 @@ public: Operator(String function_name_, Int32 priority_, Int32 arity_ = 2, - OperatorType type_ = OperatorType::None) : function_name(function_name_), priority(priority_), arity(arity_), type(type_) + OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) { } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 4aef96daec9..45a8b48ce26 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -448,12 +448,19 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserExpression : public IParserBase +{ +protected: + const char * getName() const override { return "lambda expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase { private: - ParserLambdaExpression elem_parser; + ParserExpression elem_parser; protected: const char * getName() const override { return "table function expression"; } @@ -509,15 +516,6 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; - -class ParserExpression : public IParserBase -{ -protected: - const char * getName() const override { return "lambda expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - class ParserExpressionWithOptionalAlias2 : public IParserBase { public: diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp index 08df6d8da7a..2b3cf98a8a7 100644 --- a/src/Parsers/ParserCreateFunctionQuery.cpp +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -20,7 +20,7 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp ParserKeyword s_on("ON"); ParserIdentifier function_name_p; ParserKeyword s_as("AS"); - ParserLambdaExpression lambda_p; + ParserExpression lambda_p; ASTPtr function_name; ASTPtr function_core; From c7fda1352cc6aa6828ba663923e2bcceb2b0fdf9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 30 Jun 2022 00:06:02 +0000 Subject: [PATCH 037/173] Fix --- src/Parsers/ExpressionListParsers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e679c0c46e8..85e26d1d62d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2325,13 +2325,13 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Operator prev_op; storage.back()->popOperator(prev_op); - if (op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) + if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) { op.arity += prev_op.arity - 1; break; } - if (op.type == OperatorType::FinishBetween) + if (prev_op.type == OperatorType::FinishBetween) { Operator tmp_op; if (!storage.back()->popOperator(tmp_op)) From 7ea9f8d6c172bb14c2bf5650c71044c6f2665725 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 4 Jul 2022 14:25:18 +0000 Subject: [PATCH 038/173] Test Tree hash of the new parser --- src/Parsers/ParserSelectQuery.cpp | 46 ++++++++++---- tests/queries/0_stateless/_02.reference | 46 -------------- tests/queries/0_stateless/_02.sh | 81 +++++++++++++++---------- 3 files changed, 84 insertions(+), 89 deletions(-) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 8622cc9f615..13e3b0adbd4 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -163,18 +163,40 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } // TEST - ParserToken test(TokenType::DollarSign); - if (!test.ignore(pos, expected)) - { - if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) - return false; - } - else - { - ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); - if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) - return false; - } + auto pos_test = pos; + auto expected_test = expected; + ASTPtr select_expression_list_test; + ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); + + bool res_test = exp_list_for_select_clause2.parse(pos_test, select_expression_list_test, expected_test); + bool res = exp_list_for_select_clause.parse(pos, select_expression_list, expected); + + if (res != res_test && res) + throw Exception("PARSER TEST: old parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); + + if (res != res_test && res_test) + throw Exception("PARSER TEST: new parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); + + if (!res) + return false; + + if (select_expression_list->getTreeHash() != select_expression_list_test->getTreeHash()) + throw Exception("PARSER TEST: Tree hash differs. \n\n OLD: \n" + select_expression_list_test->dumpTree() + + "\n\n NEW: \n" + select_expression_list->dumpTree(), ErrorCodes::SYNTAX_ERROR); + + // ParserToken test(TokenType::DollarSign); + // if (!test.ignore(pos, expected)) + // { + // if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) + // return false; + // } + // else + // { + // ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); + // ASTPtr select_expression_list; + // if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) + // return false; + // } } /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) diff --git a/tests/queries/0_stateless/_02.reference b/tests/queries/0_stateless/_02.reference index 2f08358e4c7..e69de29bb2d 100644 --- a/tests/queries/0_stateless/_02.reference +++ b/tests/queries/0_stateless/_02.reference @@ -1,46 +0,0 @@ -equal (RES): SELECT 1 + 1 -equal (AST): SELECT 1 + 1 -equal (RES): SELECT 3 + 7 * 5 + 32 / 2 - 5 * 2 -equal (AST): SELECT 3 + 7 * 5 + 32 / 2 - 5 * 2 -equal (RES): SELECT 100 MOD 5 DIV 20 MOD 5 -equal (AST): SELECT 100 MOD 5 DIV 20 MOD 5 -equal (RES): SELECT 1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5 -equal (AST): SELECT 1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5 -equal (RES): SELECT 20 MOD 10 > 200 DIV 6 -equal (AST): SELECT 20 MOD 10 > 200 DIV 6 -equal (RES): SELECT 5 != 80 / 8 + 2 * 5 -equal (AST): SELECT 5 != 80 / 8 + 2 * 5 -equal (AST): SELECT a.5 -equal (AST): SELECT a.b.5 -equal (AST): SELECT a.b.n.v -equal (AST): SELECT 10 * a.b.5 / 3 -equal (RES): SELECT -1::Int64 -equal (AST): SELECT -1::Int64 -equal (RES): SELECT [1,2,3]::Array(Int64) -equal (AST): SELECT [1,2,3]::Array(Int64) -equal (RES): SELECT [1,2,cos(1)] -equal (AST): SELECT [1,2,cos(1)] -equal (AST): SELECT [a,b,c] -equal (AST): SELECT [a,b,c]::Array(UInt8) -equal (RES): SELECT number AS a1, number AS b2, number FROM numbers(10) -equal (AST): SELECT number AS a1, number AS b2, number FROM numbers(10) -equal (AST): SELECT *[n] -equal (RES): SELECT 3 + 7 * (5 + 32) / 2 - 5 * (2 - 1) -equal (AST): SELECT 3 + 7 * (5 + 32) / 2 - 5 * (2 - 1) -equal (AST): SELECT (a, b, c) * ((a, b, c) + (a, b, c)) -equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res -equal (AST): SELECT 1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res -equal (AST): SELECT 'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack -equal (AST): SELECT 'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack -equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c] as Array(String)), 4] -equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c], Array(String)), 4] -equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c] as Array(String)), 4] -equal (AST): SELECT [1, 2, 3, cast(['a', 'b', c], Array(String)), 4] -equal (RES): SELECT EXTRACT(DAY FROM toDate('2017-06-15')) -equal (AST): SELECT EXTRACT(DAY FROM toDate('2017-06-15')) -equal (RES): SELECT substring(toFixedString('hello12345', 16) from 1 for 8) -equal (AST): SELECT substring(toFixedString('hello12345', 16) from 1 for 8) -equal (RES): SELECT position('Hello, world!' IN '!') -equal (AST): SELECT position('Hello, world!' IN '!') -equal (RES): SELECT trim(TRAILING 'x' FROM 'xxfooxx') -equal (AST): SELECT trim(TRAILING 'x' FROM 'xxfooxx') diff --git a/tests/queries/0_stateless/_02.sh b/tests/queries/0_stateless/_02.sh index e59876159f7..4d95ffbed4d 100755 --- a/tests/queries/0_stateless/_02.sh +++ b/tests/queries/0_stateless/_02.sh @@ -5,6 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh compare () { + if [ "$3" == 2 ];then + R_1=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST $1") + R_2=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST $2" 2>/dev/null) + + if [ "$R_1" == "$R_2" ];then + echo "equal (RES): $1"; + else + echo "============== not equal ===================" + echo "not equal (RES): $1"; + echo "# Original: $R_1"; + echo "# Ours: $R_2"; + echo "============================================" + fi + fi + if [ "$2" != 0 ];then R_1=$($CLICKHOUSE_CLIENT -q "SELECT $1") R_2=$($CLICKHOUSE_CLIENT -q "SELECT \$ $1" 2>/dev/null) @@ -34,46 +49,50 @@ compare () { fi } -compare "1 + 1" -compare "3 + 7 * 5 + 32 / 2 - 5 * 2" -compare "100 MOD 5 DIV 20 MOD 5" -compare "1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5" -compare "20 MOD 10 > 200 DIV 6" -compare "5 != 80 / 8 + 2 * 5" +# compare "1 + 1" +# compare "3 + 7 * 5 + 32 / 2 - 5 * 2" +# compare "100 MOD 5 DIV 20 MOD 5" +# compare "1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5" +# compare "20 MOD 10 > 200 DIV 6" +# compare "5 != 80 / 8 + 2 * 5" -compare "a.5" 0 -compare "a.b.5" 0 -compare "a.b.n.v" 0 -compare "10 * a.b.5 / 3" 0 +# compare "a.5" 0 +# compare "a.b.5" 0 +# compare "a.b.n.v" 0 +# compare "10 * a.b.5 / 3" 0 -compare "-1::Int64" -compare "[1,2,3]::Array(Int64)" -compare "[1,2,cos(1)]" -compare "[a,b,c]" 0 -compare "[a,b,c]::Array(UInt8)" 0 +# compare "-1::Int64" +# compare "[1,2,3]::Array(Int64)" +# compare "[1,2,cos(1)]" +# compare "[a,b,c]" 0 +# compare "[a,b,c]::Array(UInt8)" 0 -compare "number AS a1, number AS b2, number FROM numbers(10)" -compare "*[n]" 0 +# compare "number AS a1, number AS b2, number FROM numbers(10)" +# compare "*[n]" 0 -compare "3 + 7 * (5 + 32) / 2 - 5 * (2 - 1)" -compare "(a, b, c) * ((a, b, c) + (a, b, c))" 0 +# compare "3 + 7 * (5 + 32) / 2 - 5 * (2 - 1)" +# compare "(a, b, c) * ((a, b, c) + (a, b, c))" 0 -compare "1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res" 0 -compare "1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res" 0 +# compare "1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res" 0 +# compare "1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res" 0 -compare "'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack" 0 -compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 +# compare "'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack" 0 +# compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 -compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 -compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 +# compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 +# compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 -compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 -compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 +# compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 +# compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 -compare "EXTRACT(DAY FROM toDate('2017-06-15'))" -compare "substring(toFixedString('hello12345', 16) from 1 for 8)" -compare "position('Hello, world!' IN '!')" +# compare "EXTRACT(DAY FROM toDate('2017-06-15'))" +# compare "substring(toFixedString('hello12345', 16) from 1 for 8)" +# compare "position('Hello, world!' IN '!')" -compare "trim(TRAILING 'x' FROM 'xxfooxx')" +# compare "trim(TRAILING 'x' FROM 'xxfooxx')" # compare "ltrim('') || rtrim('') || trim('')" + +# compare "WITH 2 AS \`b.c\`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t +# SELECT INTERVAL CASE CASE WHEN NOT -a[b.c] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;" "WITH 2 AS \`b.c\`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t +# SELECT \$ INTERVAL CASE CASE WHEN NOT -a[b.c] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;" 2 \ No newline at end of file From 01ee10a588fcc9940fb69b028dee1f029c94fba9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 14 Jul 2022 12:11:14 +0000 Subject: [PATCH 039/173] Better --- src/Parsers/ExpressionListParsers.cpp | 3559 +++++++++++++------------ src/Parsers/ParserSelectQuery.cpp | 55 +- 2 files changed, 1825 insertions(+), 1789 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 85e26d1d62d..eb09411f29e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -658,1767 +658,6 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments) return f_combined_expression; } -enum Action -{ - OPERAND, - OPERATOR -}; - -enum OperatorType -{ - None, - Comparison, - Mergeable, - ArrayElement, - TupleElement, - IsNull, - StartBetween, - StartNotBetween, - FinishBetween, - StartIf, - FinishIf, - Cast -}; - -// class Operator: -// - defines structure of certain operator -class Operator -{ -public: - Operator() = default; - - Operator(String function_name_, - Int32 priority_, - Int32 arity_ = 2, - OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) - { - } - - OperatorType type; - Int32 priority; - Int32 arity; - String function_name; -}; - -class Layer -{ -public: - virtual ~Layer() = default; - - bool popOperator(Operator & op) - { - if (operators.empty()) - return false; - - op = std::move(operators.back()); - operators.pop_back(); - - return true; - } - - void pushOperator(Operator op, bool count = true) - { - if (count) - { - if (op.type != OperatorType::Mergeable) - { - ++depth_diff; - ++depth_total; - } - else - { - depth_diff -= depth_total; - depth_total = 0; - } - } - - operators.push_back(std::move(op)); - } - - bool popOperand(ASTPtr & op) - { - if (operands.empty()) - return false; - - op = std::move(operands.back()); - operands.pop_back(); - - return true; - } - - void pushOperand(ASTPtr op) - { - operands.push_back(std::move(op)); - } - - void pushResult(ASTPtr op) - { - result.push_back(std::move(op)); - } - - virtual bool getResult(ASTPtr & op) - { - if (result.size() == 1) - { - op = std::move(result[0]); - return true; - } - - return false; - } - - virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) - { - return true; - } - - bool isFinished() const - { - return state == -1; - } - - int previousPriority() const - { - if (operators.empty()) - return 0; - - return operators.back().priority; - } - - OperatorType previousType() const - { - if (operators.empty()) - return OperatorType::None; - - return operators.back().type; - } - - int empty() const - { - return operators.empty() && operands.empty(); - } - - bool lastNOperands(ASTs & asts, size_t n) - { - if (n > operands.size()) - return false; - - auto start = operands.begin() + operands.size() - n; - asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); - operands.erase(start, operands.end()); - - return true; - } - - bool wrapLayer(bool push_to_result = true) - { - Operator cur_op; - while (popOperator(cur_op)) - { - ASTPtr func; - - // Special case of ternary operator - if (cur_op.type == OperatorType::StartIf) - return false; - - if (cur_op.type == OperatorType::FinishIf) - { - Operator tmp; - if (!popOperator(tmp) || tmp.type != OperatorType::StartIf) - return false; - } - - // Special case of a BETWEEN b AND c operator - if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) - return false; - - if (cur_op.type == OperatorType::FinishBetween) - { - Operator tmp_op; - if (!popOperator(tmp_op)) - return false; - - if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) - return false; - - bool negative = tmp_op.type == OperatorType::StartNotBetween; - - ASTs arguments; - if (!lastNOperands(arguments, 3)) - return false; - - func = makeBetweenOperator(negative, arguments); - } - else - { - func = makeASTFunction(cur_op.function_name); - - if (!lastNOperands(func->children[0]->children, cur_op.arity)) - return false; - } - - pushOperand(func); - } - - ASTPtr node; - if (!popOperand(node)) - return false; - - bool res = empty(); - - if (push_to_result) - pushResult(node); - else - pushOperand(node); - - depth_diff -= depth_total; - depth_total = 0; - - return res; - } - - bool parseLambda() - { - // 0. If empty - create function tuple with 0 args - if (empty()) - { - auto func = makeASTFunction("tuple"); - pushOperand(func); - return true; - } - - if (!wrapLayer()) - return false; - - /// 1. If there is already tuple do nothing - if (tryGetFunctionName(result.back()) == "tuple") - { - pushOperand(result.back()); - result.pop_back(); - } - /// 2. Put all result in a single tuple - else - { - auto func = makeASTFunction("tuple", result); - result.clear(); - pushOperand(func); - } - return true; - } - - bool parseBase(IParser::Pos & pos, Expected & expected, Action & action, TokenType separator, TokenType end) - { - if (ParserToken(separator).ignore(pos, expected)) - { - action = Action::OPERAND; - return wrapLayer(); - } - - if (ParserToken(end).ignore(pos, expected)) - { - action = Action::OPERATOR; - - if (!empty() || !result.empty()) - if (!wrapLayer()) - return false; - - state = -1; - } - - return true; - } - - bool insertAlias(ASTPtr node) - { - if (!wrapLayer(false)) - return false; - - if (operands.empty()) - return false; - - if (auto * ast_with_alias = dynamic_cast(operands.back().get())) - tryGetIdentifierNameInto(node, ast_with_alias->alias); - else - return false; - - return true; - } - - void addBetween() - { - ++open_between; - } - - void subBetween() - { - --open_between; - } - - bool hasBetween() const - { - return open_between > 0; - } - - void syncDepth(IParser::Pos & pos) - { - for (; depth_diff > 0; --depth_diff) - pos.increaseDepth(); - - for (; depth_diff < 0; ++depth_diff) - pos.decreaseDepth(); - } - -protected: - std::vector operators; - ASTs operands; - ASTs result; - int state = 0; - - int open_between = 0; - int depth_diff = 1; - int depth_total = 1; -}; - -class FunctionLayer : public Layer -{ -public: - explicit FunctionLayer(String function_name_) : function_name(function_name_) - { - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - state = 1; - - auto pos_after_bracket = pos; - auto old_expected = expected; - - ParserKeyword all("ALL"); - ParserKeyword distinct("DISTINCT"); - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_all = false; - has_distinct = false; - } - } - - contents_begin = pos->begin; - } - - if (state == 1) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - return wrapLayer(); - } - - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - action = Action::OPERATOR; - - if (!empty() || !result.empty()) - if (!wrapLayer()) - return false; - - contents_end = pos->begin; - - /** Check for a common error case - often due to the complexity of quoting command-line arguments, - * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). - * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, - * and the query silently returns an unexpected result. - */ - if (function_name == "toDate" - && contents_end - contents_begin == strlen("2014-01-01") - && contents_begin[0] >= '2' && contents_begin[0] <= '3' - && contents_begin[1] >= '0' && contents_begin[1] <= '9' - && contents_begin[2] >= '0' && contents_begin[2] <= '9' - && contents_begin[3] >= '0' && contents_begin[3] <= '9' - && contents_begin[4] == '-' - && contents_begin[5] >= '0' && contents_begin[5] <= '9' - && contents_begin[6] >= '0' && contents_begin[6] <= '9' - && contents_begin[7] == '-' - && contents_begin[8] >= '0' && contents_begin[8] <= '9' - && contents_begin[9] >= '0' && contents_begin[9] <= '9') - { - std::string contents_str(contents_begin, contents_end - contents_begin); - throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" - , ErrorCodes::SYNTAX_ERROR); - } - - if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) - { - parameters = std::make_shared(); - std::swap(parameters->children, result); - action = Action::OPERAND; - - /// Parametric aggregate functions cannot have DISTINCT in parameters list. - if (has_distinct) - return false; - - auto pos_after_bracket = pos; - auto old_expected = expected; - - ParserKeyword all("ALL"); - ParserKeyword distinct("DISTINCT"); - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_distinct = false; - } - } - } - else - { - state = 2; - } - } - } - - if (state == 2) - { - if (has_distinct) - function_name += "Distinct"; - - auto function_node = makeASTFunction(function_name, std::move(result)); - - if (parameters) - { - function_node->parameters = parameters; - function_node->children.push_back(function_node->parameters); - } - - ParserKeyword filter("FILTER"); - ParserKeyword over("OVER"); - - if (filter.ignore(pos, expected)) - { - // We are slightly breaking the parser interface by parsing the window - // definition into an existing ASTFunction. Normally it would take a - // reference to ASTPtr and assign it the new node. We only have a pointer - // of a different type, hence this workaround with a temporary pointer. - ASTPtr function_node_as_iast = function_node; - - // Recursion - ParserFilterClause filter_parser; - if (!filter_parser.parse(pos, function_node_as_iast, expected)) - return false; - } - - if (over.ignore(pos, expected)) - { - function_node->is_window_function = true; - - ASTPtr function_node_as_iast = function_node; - - // Recursion - ParserWindowReference window_reference; - if (!window_reference.parse(pos, function_node_as_iast, expected)) - return false; - } - - result = {function_node}; - state = -1; - } - - return true; - } - -private: - bool has_all = false; - bool has_distinct = false; - - const char * contents_begin; - const char * contents_end; - - String function_name; - ASTPtr parameters; -}; - - -class RoundBracketsLayer : public Layer -{ -public: - bool getResult(ASTPtr & op) override - { - // Round brackets can mean priority operator as well as function tuple() - if (!is_tuple && result.size() == 1) - op = std::move(result[0]); - else - op = makeASTFunction("tuple", std::move(result)); - - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - is_tuple = true; - if (!wrapLayer()) - return false; - } - - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - action = Action::OPERATOR; - - if (!empty()) - if (!wrapLayer()) - return false; - - // Special case for (('a', 'b')) -> tuple(('a', 'b')) - if (!is_tuple && result.size() == 1) - if (auto * literal = result[0]->as()) - if (literal->value.getType() == Field::Types::Tuple) - is_tuple = true; - - state = -1; - } - - return true; - } -private: - bool is_tuple = false; -}; - -class ArrayLayer : public Layer -{ -public: - bool getResult(ASTPtr & op) override - { - op = makeASTFunction("array", std::move(result)); - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingSquareBracket); - } -}; - -// FunctionBaseLayer - -class ArrayElementLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingSquareBracket); - } -}; - -class CastLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - ParserKeyword as_keyword_parser("AS"); - ASTPtr alias; - - /// expr AS type - if (state == 0) - { - ASTPtr type_node; - - if (as_keyword_parser.ignore(pos, expected)) - { - auto old_pos = pos; - - if (ParserIdentifier().parse(pos, alias, expected) && - as_keyword_parser.ignore(pos, expected) && - ParserDataType().parse(pos, type_node, expected) && - ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!insertAlias(alias)) - return false; - - if (!wrapLayer()) - return false; - - result = {createFunctionCast(result[0], type_node)}; - state = -1; - return true; - } - - pos = old_pos; - - if (ParserIdentifier().parse(pos, alias, expected) && - ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - if (!insertAlias(alias)) - return false; - - if (!wrapLayer()) - return false; - - state = 1; - return true; - } - - pos = old_pos; - - if (ParserDataType().parse(pos, type_node, expected) && - ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - result = {createFunctionCast(result[0], type_node)}; - state = -1; - return true; - } - - return false; - } - - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 1; - return true; - } - } - if (state == 1) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - result = {makeASTFunction("CAST", result[0], result[1])}; - state = -1; - return true; - } - } - - return true; - } -}; - -class ExtractLayer : public Layer -{ -public: - bool getResult(ASTPtr & op) override - { - if (parsed_interval_kind) - { - if (result.empty()) - return false; - - op = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); - } - else - op = makeASTFunction("extract", std::move(result)); - - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - IParser::Pos begin = pos; - ParserKeyword s_from("FROM"); - - if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) - { - parsed_interval_kind = true; - state = 2; - return true; - } - else - { - state = 1; - pos = begin; - } - } - - if (state == 1) - { - return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); - } - - if (state == 2) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - state = -1; - return true; - } - } - - return true; - } - -private: - IntervalKind interval_kind; - bool parsed_interval_kind = false; -}; - -class SubstringLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) - /// The latter will be parsed normally as a function later. - - if (state == 0) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected) || - ParserKeyword("FROM").ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 1; - } - } - - if (state == 1) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected) || - ParserKeyword("FOR").ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 2; - } - } - - if (state == 1 || state == 2) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - result = {makeASTFunction("substring", result)}; - state = -1; - return true; - } - } - - return true; - } -}; - -class PositionLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 1; - } - if (ParserKeyword("IN").ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 2; - } - } - - if (state == 1) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - } - } - - if (state == 1 || state == 2) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - if (state == 1) - result = {makeASTFunction("position", result)}; - else - result = {makeASTFunction("position", result[1], result[0])}; - - state = -1; - return true; - } - } - - return true; - } -}; - - -class ExistsLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override - { - ASTPtr node; - - // Recursion - if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) - return false; - - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - auto subquery = std::make_shared(); - subquery->children.push_back(node); - result = {makeASTFunction("exists", subquery)}; - - state = -1; - - return true; - } -}; - -class TrimLayer : public Layer -{ -public: - TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) - { - } - - bool getResult(ASTPtr & op) override - { - op = makeASTFunction(function_name, std::move(result)); - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - /// Handles all possible TRIM/LTRIM/RTRIM call variants - - if (state == 0) - { - if (!trim_left && !trim_right) - { - if (ParserKeyword("BOTH").ignore(pos, expected)) - { - trim_left = true; - trim_right = true; - char_override = true; - } - else if (ParserKeyword("LEADING").ignore(pos, expected)) - { - trim_left = true; - char_override = true; - } - else if (ParserKeyword("TRAILING").ignore(pos, expected)) - { - trim_right = true; - char_override = true; - } - else - { - trim_left = true; - trim_right = true; - } - - if (char_override) - state = 1; - else - state = 2; - } - else - { - state = 2; - } - } - - if (state == 1) - { - if (ParserKeyword("FROM").ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - to_remove = makeASTFunction("regexpQuoteMeta", result[0]); - result.clear(); - state = 2; - } - } - - if (state == 2) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - ASTPtr pattern_node; - - if (char_override) - { - auto pattern_func_node = std::make_shared(); - auto pattern_list_args = std::make_shared(); - if (trim_left && trim_right) - { - pattern_list_args->children = { - std::make_shared("^["), - to_remove, - std::make_shared("]+|["), - to_remove, - std::make_shared("]+$") - }; - function_name = "replaceRegexpAll"; - } - else - { - if (trim_left) - { - pattern_list_args->children = { - std::make_shared("^["), - to_remove, - std::make_shared("]+") - }; - } - else - { - /// trim_right == false not possible - pattern_list_args->children = { - std::make_shared("["), - to_remove, - std::make_shared("]+$") - }; - } - function_name = "replaceRegexpOne"; - } - - pattern_func_node->name = "concat"; - pattern_func_node->arguments = std::move(pattern_list_args); - pattern_func_node->children.push_back(pattern_func_node->arguments); - - pattern_node = std::move(pattern_func_node); - } - else - { - if (trim_left && trim_right) - { - function_name = "trimBoth"; - } - else - { - if (trim_left) - { - function_name = "trimLeft"; - } - else - { - /// trim_right == false not possible - function_name = "trimRight"; - } - } - } - - if (char_override) - { - result.push_back(pattern_node); - result.push_back(std::make_shared("")); - } - - state = -1; - } - } - - return true; - } -private: - bool trim_left; - bool trim_right; - bool char_override = false; - - ASTPtr to_remove; - String function_name; -}; - - -class DateAddLayer : public Layer -{ -public: - explicit DateAddLayer(const char * function_name_) : function_name(function_name_) - { - } - - bool getResult(ASTPtr & op) override - { - if (parsed_interval_kind) - { - result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); - op = makeASTFunction(function_name, result[1], result[0]); - } - else - op = makeASTFunction(function_name, std::move(result)); - - return true; - } - - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (!ParserToken(TokenType::Comma).ignore(pos, expected)) - return false; - - action = Action::OPERAND; - state = 2; - parsed_interval_kind = true; - } - else - { - state = 1; - } - } - - if (state == 1) - { - return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); - } - - if (state == 2) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!wrapLayer()) - return false; - - state = 3; - } - } - - if (state == 3) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - state = -1; - } - } - return true; - } - -private: - IntervalKind interval_kind; - const char * function_name; - bool parsed_interval_kind = false; -}; - - -class DateDiffLayer : public Layer -{ -public: - bool getResult(ASTPtr & op) override - { - if (parsed_interval_kind) - { - if (result.size() == 2) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1]); - else if (result.size() == 3) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2]); - else - return false; - } - else - { - op = makeASTFunction("dateDiff", std::move(result)); - } - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - if (parseIntervalKind(pos, expected, interval_kind)) - { - parsed_interval_kind = true; - - if (!ParserToken(TokenType::Comma).ignore(pos, expected)) - return false; - } - - state = 1; - } - - if (state == 1) - { - return Layer::parseBase(pos, expected, action, TokenType::Comma, TokenType::ClosingRoundBracket); - } - - return true; - } - -private: - IntervalKind interval_kind; - bool parsed_interval_kind = false; -}; - - -class IntervalLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override - { - if (state == 0) - { - auto begin = pos; - auto init_expected = expected; - ASTPtr string_literal; - //// A String literal followed INTERVAL keyword, - /// the literal can be a part of an expression or - /// include Number and INTERVAL TYPE at the same time - if (ParserStringLiteral{}.parse(pos, string_literal, expected)) - { - String literal; - if (string_literal->as().value.tryGet(literal)) - { - Tokens tokens(literal.data(), literal.data() + literal.size()); - IParser::Pos token_pos(tokens, 0); - Expected token_expected; - ASTPtr expr; - - if (!ParserNumber{}.parse(token_pos, expr, token_expected)) - return false; - else - { - /// case: INTERVAL '1' HOUR - /// back to begin - if (!token_pos.isValid()) - { - pos = begin; - expected = init_expected; - } - else - { - /// case: INTERVAL '1 HOUR' - if (!parseIntervalKind(token_pos, token_expected, interval_kind)) - return false; - - result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; - state = -1; - return true; - } - } - } - } - state = 1; - } - - if (state == 1) - { - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (!wrapLayer()) - return false; - - result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result)}; - state = -1; - } - } - - return true; - } - -private: - IntervalKind interval_kind; -}; - - -class CaseLayer : public Layer -{ -public: - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - if (state == 0) - { - auto old_pos = pos; - has_case_expr = !ParserKeyword("WHEN").ignore(pos, expected); - pos = old_pos; - - state = 1; - } - - if (state == 1) - { - if (ParserKeyword("WHEN").ignore(pos, expected)) - { - if ((has_case_expr || !result.empty()) && !wrapLayer()) - return false; - - action = Action::OPERAND; - state = 2; - } - else if (ParserKeyword("ELSE").ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - action = Action::OPERAND; - state = 3; - } - else if (ParserKeyword("END").ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - Field field_with_null; - ASTLiteral null_literal(field_with_null); - result.push_back(std::make_shared(null_literal)); - - if (has_case_expr) - result = {makeASTFunction("caseWithExpression", result)}; - else - result = {makeASTFunction("multiIf", result)}; - state = -1; - } - } - - if (state == 2) - { - if (ParserKeyword("THEN").ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - action = Action::OPERAND; - state = 1; - } - } - - if (state == 3) - { - if (ParserKeyword("END").ignore(pos, expected)) - { - if (!wrapLayer()) - return false; - - if (has_case_expr) - result = {makeASTFunction("caseWithExpression", result)}; - else - result = {makeASTFunction("multiIf", result)}; - - state = -1; - } - } - - return true; - } - -private: - bool has_case_expr; -}; - - -bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) -{ - IParser::Pos begin = pos; - - if (ParserCastOperator().parse(pos, node, expected)) - return true; - - pos = begin; - - /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. - if (pos->type == TokenType::Minus) - { - if (ParserLiteral().parse(pos, node, expected)) - return true; - } - return false; -} - -bool ParseDateOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no DATE keyword, go to the nested parser. - if (!ParserKeyword("DATE").ignore(pos, expected)) - return false; - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return false; - } - - node = makeASTFunction("toDate", expr); - return true; -} - -bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no TIMESTAMP keyword, go to the nested parser. - if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) - return false; - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return false; - } - - node = makeASTFunction("toDateTime", expr); - - return true; -} - -bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - static std::vector> op_table({ - {"+", Operator("plus", 11)}, - {"-", Operator("minus", 11)}, - {"*", Operator("multiply", 12)}, - {"/", Operator("divide", 12)}, - {"%", Operator("modulo", 12)}, - {"MOD", Operator("modulo", 12)}, - {"DIV", Operator("intDiv", 12)}, - {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, - {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, - {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, - {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, - {"<", Operator("less", 9, 2, OperatorType::Comparison)}, - {">", Operator("greater", 9, 2, OperatorType::Comparison)}, - {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, - {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, - {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, - {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, - {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, - {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, - {"LIKE", Operator("like", 9)}, - {"ILIKE", Operator("ilike", 9)}, - {"NOT LIKE", Operator("notLike", 9)}, - {"NOT ILIKE", Operator("notILike", 9)}, - {"IN", Operator("in", 9)}, - {"NOT IN", Operator("notIn", 9)}, - {"GLOBAL IN", Operator("globalIn", 9)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, - {"?", Operator("", 2, 0, OperatorType::StartIf)}, - {":", Operator("if", 3, 3, OperatorType::FinishIf)}, - {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, - {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, - {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, - {"::", Operator("CAST", 14, 2, OperatorType::Cast)} - }); - - static std::vector> op_table_unary({ - {"NOT", Operator("not", 5, 1)}, - {"-", Operator("negate", 13, 1)} - }); - - auto lambda_operator = Operator("lambda", 1, 2); - auto finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); - - ParserCompoundIdentifier identifier_parser(false, true); - ParserNumber number_parser; - ParserAsterisk asterisk_parser; - ParserLiteral literal_parser; - ParserTupleOfLiterals tuple_literal_parser; - ParserArrayOfLiterals array_literal_parser; - ParserSubstitution substitution_parser; - ParserMySQLGlobalVariable mysql_global_variable_parser; - - ParserKeyword any_parser("ANY"); - ParserKeyword all_parser("ALL"); - - // Recursion - ParserQualifiedAsterisk qualified_asterisk_parser; - ParserColumnsMatcher columns_matcher_parser; - ParserSubquery subquery_parser; - - Action next = Action::OPERAND; - - std::vector> storage; - storage.push_back(std::make_unique()); - - while (pos.isValid()) - { - if (!storage.back()->parse(pos, expected, next)) - return false; - - storage.back()->syncDepth(pos); - - if (storage.back()->isFinished()) - { - next = Action::OPERATOR; - - ASTPtr res; - if (!storage.back()->getResult(res)) - return false; - - storage.pop_back(); - storage.back()->pushOperand(res); - continue; - } - - if (next == Action::OPERAND) - { - next = Action::OPERATOR; - ASTPtr tmp; - - /// Special case for cast expression - if (storage.back()->previousType() != OperatorType::TupleElement && - ParseCastExpression(pos, tmp, expected)) - { - storage.back()->pushOperand(std::move(tmp)); - continue; - } - - if (storage.back()->previousType() == OperatorType::Comparison) - { - auto old_pos = pos; - SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - - if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) - subquery_function_type = SubqueryFunctionType::ANY; - else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) - subquery_function_type = SubqueryFunctionType::ALL; - - if (subquery_function_type != SubqueryFunctionType::NONE) - { - Operator prev_op; - ASTPtr function, argument; - - if (!storage.back()->popOperator(prev_op)) - return false; - if (!storage.back()->popOperand(argument)) - return false; - - function = makeASTFunction(prev_op.function_name, argument, tmp); - - if (!modifyAST(function, subquery_function_type)) - return false; - - storage.back()->pushOperand(std::move(function)); - continue; - } - else - { - pos = old_pos; - } - } - - /// Try to find any unary operators - auto cur_op = op_table_unary.begin(); - for (; cur_op != op_table_unary.end(); ++cur_op) - { - if (parseOperator(pos, cur_op->first, expected)) - break; - } - - if (cur_op != op_table_unary.end()) - { - next = Action::OPERAND; - storage.back()->pushOperator(cur_op->second); - } - else if (parseOperator(pos, "INTERVAL", expected)) - { - next = Action::OPERAND; - storage.push_back(std::make_unique()); - } - else if (parseOperator(pos, "CASE", expected)) - { - next = Action::OPERAND; - storage.push_back(std::make_unique()); - } - else if (ParseDateOperatorExpression(pos, tmp, expected) || - ParseTimestampOperatorExpression(pos, tmp, expected) || - tuple_literal_parser.parse(pos, tmp, expected) || - array_literal_parser.parse(pos, tmp, expected) || - number_parser.parse(pos, tmp, expected) || - literal_parser.parse(pos, tmp, expected) || - asterisk_parser.parse(pos, tmp, expected) || - qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) - { - storage.back()->pushOperand(std::move(tmp)); - } - else if (identifier_parser.parse(pos, tmp, expected)) - { - if (pos->type == TokenType::OpeningRoundBracket) - { - ++pos; - - next = Action::OPERAND; - - String function_name = getIdentifierName(tmp); - String function_name_lowercase = Poco::toLower(function_name); - - if (function_name_lowercase == "cast") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "extract") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "substring") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "position") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "exists") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "trim") - storage.push_back(std::make_unique(false, false)); - else if (function_name_lowercase == "ltrim") - storage.push_back(std::make_unique(true, false)); - else if (function_name_lowercase == "rtrim") - storage.push_back(std::make_unique(false, true)); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - storage.push_back(std::make_unique("plus")); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - storage.push_back(std::make_unique("minus")); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - storage.push_back(std::make_unique()); - else if (function_name_lowercase == "grouping") - storage.push_back(std::make_unique(function_name_lowercase)); - else - storage.push_back(std::make_unique(function_name)); - } - else - { - storage.back()->pushOperand(std::move(tmp)); - } - } - else if (substitution_parser.parse(pos, tmp, expected)) - { - storage.back()->pushOperand(std::move(tmp)); - } - else if (pos->type == TokenType::OpeningRoundBracket) - { - if (subquery_parser.parse(pos, tmp, expected)) - { - storage.back()->pushOperand(std::move(tmp)); - continue; - } - next = Action::OPERAND; - storage.push_back(std::make_unique()); - ++pos; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - - next = Action::OPERAND; - storage.push_back(std::make_unique()); - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) - { - storage.back()->pushOperand(std::move(tmp)); - } - else - { - break; - } - } - else - { - next = Action::OPERAND; - ASTPtr tmp; - - Expected stub; - if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) - break; - - /// Try to find operators from 'op_table' - auto cur_op = op_table.begin(); - for (; cur_op != op_table.end(); ++cur_op) - { - if (parseOperator(pos, cur_op->first, expected)) - break; - } - - if (cur_op != op_table.end()) - { - auto op = cur_op->second; - - // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator - if (op.function_name == "and" && storage.back()->hasBetween()) - { - storage.back()->subBetween(); - op = finish_between_operator; - } - - while (storage.back()->previousPriority() >= op.priority) - { - ASTPtr func; - Operator prev_op; - storage.back()->popOperator(prev_op); - - if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) - { - op.arity += prev_op.arity - 1; - break; - } - - if (prev_op.type == OperatorType::FinishBetween) - { - Operator tmp_op; - if (!storage.back()->popOperator(tmp_op)) - return false; - - if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) - return false; - - bool negative = tmp_op.type == OperatorType::StartNotBetween; - - ASTs arguments; - if (!storage.back()->lastNOperands(arguments, 3)) - return false; - - func = makeBetweenOperator(negative, arguments); - } - else - { - func = makeASTFunction(prev_op.function_name); - - if (!storage.back()->lastNOperands(func->children[0]->children, prev_op.arity)) - return false; - } - - storage.back()->pushOperand(func); - } - storage.back()->pushOperator(op); - - if (op.type == OperatorType::ArrayElement) - storage.push_back(std::make_unique()); - - // isNull & isNotNull is postfix unary operator - if (op.type == OperatorType::IsNull) - next = Action::OPERATOR; - - if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) - storage.back()->addBetween(); - - if (op.type == OperatorType::Cast) - { - next = Action::OPERATOR; - - ASTPtr type_ast; - if (!ParserDataType().parse(pos, type_ast, expected)) - return false; - - storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); - } - } - else if (parseOperator(pos, "->", expected)) - { - if (!storage.back()->parseLambda()) - return false; - - storage.back()->pushOperator(lambda_operator); - } - else if (storage.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) - { - if (!storage.back()->insertAlias(tmp)) - return false; - } - else if (pos->type == TokenType::Comma) - { - if (storage.size() == 1) - break; - } - else - { - break; - } - } - } - - // Check if we only have one starting layer - if (storage.size() > 1) - return false; - - if (!storage.back()->wrapLayer()) - return false; - - if (!storage.back()->getResult(node)) - return false; - - storage.back()->syncDepth(pos); - - return true; -} bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -2868,4 +1107,1802 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } + +enum class Action +{ + OPERAND, + OPERATOR +}; + +/** Operator types are needed for special handling of certain operators. + * Operators can be grouped into some type if they have similar behaviour. + * Certain operators are unique in terms of their behaviour, so they are assigned a separate type. + */ + +enum class OperatorType +{ + None, + Comparison, + Mergeable, + ArrayElement, + TupleElement, + IsNull, + StartBetween, + StartNotBetween, + FinishBetween, + StartIf, + FinishIf, + Cast +}; + +/** Operator class stores parameters of the operator: + * - function_name name of the function that operator will create + * - priority priority of the operator relative to the other operators + * - arity the amount of arguments that operator will consume + * - type type of the operator that defines its behaviour + */ +class Operator +{ +public: + Operator() = default; + + Operator(String function_name_, + Int32 priority_, + Int32 arity_ = 2, + OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) + { + } + + OperatorType type; + Int32 priority; + Int32 arity; + String function_name; +}; + +/** Layer is a class that represents context for parsing certain element, + * that consists of other elements e.g. f(x1, x2, x3) + * + * - Manages operands and operators for the future elements (arguments) + * - Combines operands and operator into one element + * - Parsers separators and endings + * - Combines resulting arguments into a function + */ + +class Layer +{ +public: + virtual ~Layer() = default; + + bool popOperator(Operator & op) + { + if (operators.empty()) + return false; + + op = std::move(operators.back()); + operators.pop_back(); + + return true; + } + + void pushOperator(Operator op) + { + /// Mergeable operators does not add depth compared to other operators + /// a AND b AND c => and(a, b, c) + if (op.type != OperatorType::Mergeable) + { + ++depth_diff; + ++depth_total; + } + else + { + depth_diff -= depth_total; + depth_total = 0; + } + + operators.push_back(std::move(op)); + } + + bool popOperand(ASTPtr & op) + { + if (operands.empty()) + return false; + + op = std::move(operands.back()); + operands.pop_back(); + + return true; + } + + void pushOperand(ASTPtr op) + { + operands.push_back(std::move(op)); + } + + void pushResult(ASTPtr op) + { + result.push_back(std::move(op)); + } + + virtual bool getResult(ASTPtr & op) + { + if (result.size() == 1) + { + op = std::move(result[0]); + return true; + } + + return false; + } + + virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) + { + return true; + } + + bool isFinished() const + { + return finished; + } + + int previousPriority() const + { + if (operators.empty()) + return 0; + + return operators.back().priority; + } + + OperatorType previousType() const + { + if (operators.empty()) + return OperatorType::None; + + return operators.back().type; + } + + int empty() const + { + return operators.empty() && operands.empty(); + } + + bool popLastNOperands(ASTs & asts, size_t n) + { + if (n > operands.size()) + return false; + + auto start = operands.begin() + operands.size() - n; + asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); + operands.erase(start, operands.end()); + + return true; + } + + /// Merge operators and operands into a single element. + /// Operators are previously sorted in ascending order, + /// so we can just merge them with operands starting from the end. + /// + bool mergeElement(bool push_to_result = true) + { + Operator cur_op; + while (popOperator(cur_op)) + { + ASTPtr function; + + // Special case of ternary operator + if (cur_op.type == OperatorType::StartIf) + return false; + + if (cur_op.type == OperatorType::FinishIf) + { + Operator tmp; + if (!popOperator(tmp) || tmp.type != OperatorType::StartIf) + return false; + } + + // Special case of a BETWEEN b AND c operator + if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) + return false; + + if (cur_op.type == OperatorType::FinishBetween) + { + Operator tmp_op; + if (!popOperator(tmp_op)) + return false; + + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return false; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; + + ASTs arguments; + if (!popLastNOperands(arguments, 3)) + return false; + + function = makeBetweenOperator(negative, arguments); + } + else + { + function = makeASTFunction(cur_op.function_name); + + if (!popLastNOperands(function->children[0]->children, cur_op.arity)) + return false; + } + + pushOperand(function); + } + + ASTPtr node; + if (!popOperand(node)) + return false; + + bool res = empty(); + + if (push_to_result) + pushResult(node); + else + pushOperand(node); + + depth_diff -= depth_total; + depth_total = 0; + + return res; + } + + bool parseLambda() + { + // 0. If empty - create function tuple with 0 args + if (empty()) + { + auto function = makeASTFunction("tuple"); + pushOperand(function); + return true; + } + + if (!mergeElement()) + return false; + + /// 1. If there is already tuple do nothing + if (tryGetFunctionName(result.back()) == "tuple") + { + pushOperand(result.back()); + result.pop_back(); + } + /// 2. Put all result in a single tuple + else + { + auto function = makeASTFunction("tuple", result); + result.clear(); + pushOperand(function); + } + return true; + } + + bool insertAlias(ASTPtr node) + { + if (!mergeElement(false)) + return false; + + if (operands.empty()) + return false; + + if (auto * ast_with_alias = dynamic_cast(operands.back().get())) + tryGetIdentifierNameInto(node, ast_with_alias->alias); + else + return false; + + return true; + } + + void addBetween() + { + ++open_between; + } + + void subBetween() + { + --open_between; + } + + bool hasBetween() const + { + return open_between > 0; + } + + void syncDepth(IParser::Pos & pos) + { + for (; depth_diff > 0; --depth_diff) + pos.increaseDepth(); + + for (; depth_diff < 0; ++depth_diff) + pos.decreaseDepth(); + } + +protected: + std::vector operators; + ASTs operands; + ASTs result; + bool finished = false; + int state = 0; + + /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. + /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. + int open_between = 0; + + /// We need to count depth (at least кщгпрдн) because of the segfault in the AST destructor, if the depth is too deep. + /// We change depth in two places, in both of which we don't have acces to the current IParser::Pos. + /// So we need to store the current difference of depth to later sync it in syncDepth(pos). + int depth_diff = 1; + + /// Total depth allows us to decrease depth to the previous level (before entering our layer). + int depth_total = 1; +}; + +template +class BaseLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (ParserToken(separator).ignore(pos, expected)) + { + action = Action::OPERAND; + return mergeElement(); + } + + if (ParserToken(end).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty() || !result.empty()) + if (!mergeElement()) + return false; + + finished = true; + } + + return true; + } +}; + +class FunctionLayer : public Layer +{ +public: + explicit FunctionLayer(String function_name_) : function_name(function_name_) + { + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + state = 1; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_all = false; + has_distinct = false; + } + } + + contents_begin = pos->begin; + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + return mergeElement(); + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty() || !result.empty()) + if (!mergeElement()) + return false; + + contents_end = pos->begin; + + /** Check for a common error case - often due to the complexity of quoting command-line arguments, + * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). + * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, + * and the query silently returns an unexpected result. + */ + if (function_name == "toDate" + && contents_end - contents_begin == strlen("2014-01-01") + && contents_begin[0] >= '2' && contents_begin[0] <= '3' + && contents_begin[1] >= '0' && contents_begin[1] <= '9' + && contents_begin[2] >= '0' && contents_begin[2] <= '9' + && contents_begin[3] >= '0' && contents_begin[3] <= '9' + && contents_begin[4] == '-' + && contents_begin[5] >= '0' && contents_begin[5] <= '9' + && contents_begin[6] >= '0' && contents_begin[6] <= '9' + && contents_begin[7] == '-' + && contents_begin[8] >= '0' && contents_begin[8] <= '9' + && contents_begin[9] >= '0' && contents_begin[9] <= '9') + { + std::string contents_str(contents_begin, contents_end - contents_begin); + throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" + , ErrorCodes::SYNTAX_ERROR); + } + + if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + parameters = std::make_shared(); + std::swap(parameters->children, result); + action = Action::OPERAND; + + /// Parametric aggregate functions cannot have DISTINCT in parameters list. + if (has_distinct) + return false; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_distinct = false; + } + } + } + else + { + state = 2; + } + } + } + + if (state == 2) + { + if (has_distinct) + function_name += "Distinct"; + + auto function_node = makeASTFunction(function_name, std::move(result)); + + if (parameters) + { + function_node->parameters = parameters; + function_node->children.push_back(function_node->parameters); + } + + ParserKeyword filter("FILTER"); + ParserKeyword over("OVER"); + + if (filter.ignore(pos, expected)) + { + // We are slightly breaking the parser interface by parsing the window + // definition into an existing ASTFunction. Normally it would take a + // reference to ASTPtr and assign it the new node. We only have a pointer + // of a different type, hence this workaround with a temporary pointer. + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserFilterClause filter_parser; + if (!filter_parser.parse(pos, function_node_as_iast, expected)) + return false; + } + + if (over.ignore(pos, expected)) + { + function_node->is_window_function = true; + + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserWindowReference window_reference; + if (!window_reference.parse(pos, function_node_as_iast, expected)) + return false; + } + + result = {function_node}; + finished = true; + } + + return true; + } + +private: + bool has_all = false; + bool has_distinct = false; + + const char * contents_begin; + const char * contents_end; + + String function_name; + ASTPtr parameters; +}; + + +class RoundBracketsLayer : public Layer +{ +public: + bool getResult(ASTPtr & op) override + { + // Round brackets can mean priority operator as well as function tuple() + if (!is_tuple && result.size() == 1) + op = std::move(result[0]); + else + op = makeASTFunction("tuple", std::move(result)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + is_tuple = true; + if (!mergeElement()) + return false; + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!empty()) + if (!mergeElement()) + return false; + + // Special case for (('a', 'b')) -> tuple(('a', 'b')) + if (!is_tuple && result.size() == 1) + if (auto * literal = result[0]->as()) + if (literal->value.getType() == Field::Types::Tuple) + is_tuple = true; + + finished = true; + } + + return true; + } +private: + bool is_tuple = false; +}; + +class ArrayLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & op) override + { + op = makeASTFunction("array", std::move(result)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return BaseLayer::parse(pos, expected, action); + } +}; + +// FunctionBaseLayer + +class ArrayElementLayer : public BaseLayer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return BaseLayer::parse(pos, expected, action); + } +}; + +class CastLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + ParserKeyword as_keyword_parser("AS"); + ASTPtr alias; + + /// expr AS type + if (state == 0) + { + ASTPtr type_node; + + if (as_keyword_parser.ignore(pos, expected)) + { + auto old_pos = pos; + + if (ParserIdentifier().parse(pos, alias, expected) && + as_keyword_parser.ignore(pos, expected) && + ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!insertAlias(alias)) + return false; + + if (!mergeElement()) + return false; + + result = {createFunctionCast(result[0], type_node)}; + finished = true; + return true; + } + + pos = old_pos; + + if (ParserIdentifier().parse(pos, alias, expected) && + ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + if (!insertAlias(alias)) + return false; + + if (!mergeElement()) + return false; + + state = 1; + return true; + } + + pos = old_pos; + + if (ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + result = {createFunctionCast(result[0], type_node)}; + finished = true; + return true; + } + + return false; + } + + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + return true; + } + } + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + result = {makeASTFunction("CAST", result[0], result[1])}; + finished = true; + return true; + } + } + + return true; + } +}; + +class ExtractLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + if (result.empty()) + return false; + + op = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), result[0]); + } + else + op = makeASTFunction("extract", std::move(result)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + IParser::Pos begin = pos; + ParserKeyword s_from("FROM"); + + if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) + { + parsed_interval_kind = true; + state = 2; + return true; + } + else + { + state = 1; + pos = begin; + } + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + return true; + } + } + + return true; + } + +private: + IntervalKind interval_kind; + bool parsed_interval_kind = false; +}; + +class SubstringLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) + /// The latter will be parsed normally as a function later. + + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + } + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FOR").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 2; + } + } + + if (state == 1 || state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + result = {makeASTFunction("substring", result)}; + finished = true; + return true; + } + } + + return true; + } +}; + +class PositionLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + } + if (ParserKeyword("IN").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 2; + } + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + } + } + + if (state == 1 || state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + if (state == 1) + result = {makeASTFunction("position", result)}; + else + result = {makeASTFunction("position", result[1], result[0])}; + + finished = true; + return true; + } + } + + return true; + } +}; + + +class ExistsLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + ASTPtr node; + + // Recursion + if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) + return false; + + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + auto subquery = std::make_shared(); + subquery->children.push_back(node); + result = {makeASTFunction("exists", subquery)}; + + finished = true; + + return true; + } +}; + +class TrimLayer : public Layer +{ +public: + TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) + { + } + + bool getResult(ASTPtr & op) override + { + op = makeASTFunction(function_name, std::move(result)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Handles all possible TRIM/LTRIM/RTRIM call variants + + if (state == 0) + { + if (!trim_left && !trim_right) + { + if (ParserKeyword("BOTH").ignore(pos, expected)) + { + trim_left = true; + trim_right = true; + char_override = true; + } + else if (ParserKeyword("LEADING").ignore(pos, expected)) + { + trim_left = true; + char_override = true; + } + else if (ParserKeyword("TRAILING").ignore(pos, expected)) + { + trim_right = true; + char_override = true; + } + else + { + trim_left = true; + trim_right = true; + } + + if (char_override) + state = 1; + else + state = 2; + } + else + { + state = 2; + } + } + + if (state == 1) + { + if (ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + to_remove = makeASTFunction("regexpQuoteMeta", result[0]); + result.clear(); + state = 2; + } + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + ASTPtr pattern_node; + + if (char_override) + { + auto pattern_func_node = std::make_shared(); + auto pattern_list_args = std::make_shared(); + if (trim_left && trim_right) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+|["), + to_remove, + std::make_shared("]+$") + }; + function_name = "replaceRegexpAll"; + } + else + { + if (trim_left) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+") + }; + } + else + { + /// trim_right == false not possible + pattern_list_args->children = { + std::make_shared("["), + to_remove, + std::make_shared("]+$") + }; + } + function_name = "replaceRegexpOne"; + } + + pattern_func_node->name = "concat"; + pattern_func_node->arguments = std::move(pattern_list_args); + pattern_func_node->children.push_back(pattern_func_node->arguments); + + pattern_node = std::move(pattern_func_node); + } + else + { + if (trim_left && trim_right) + { + function_name = "trimBoth"; + } + else + { + if (trim_left) + { + function_name = "trimLeft"; + } + else + { + /// trim_right == false not possible + function_name = "trimRight"; + } + } + } + + if (char_override) + { + result.push_back(pattern_node); + result.push_back(std::make_shared("")); + } + + finished = true; + } + } + + return true; + } +private: + bool trim_left; + bool trim_right; + bool char_override = false; + + ASTPtr to_remove; + String function_name; +}; + + +class DateAddLayer : public BaseLayer +{ +public: + explicit DateAddLayer(const char * function_name_) : function_name(function_name_) + { + } + + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); + op = makeASTFunction(function_name, result[1], result[0]); + } + else + op = makeASTFunction(function_name, std::move(result)); + + return true; + } + + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + + action = Action::OPERAND; + state = 2; + parsed_interval_kind = true; + } + else + { + state = 1; + } + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 3; + } + } + + if (state == 3) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + return true; + } + +private: + IntervalKind interval_kind; + const char * function_name; + bool parsed_interval_kind = false; +}; + + +class DateDiffLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & op) override + { + if (parsed_interval_kind) + { + if (result.size() == 2) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1]); + else if (result.size() == 3) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2]); + else + return false; + } + else + { + op = makeASTFunction("dateDiff", std::move(result)); + } + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + parsed_interval_kind = true; + + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + } + + state = 1; + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + return true; + } + +private: + IntervalKind interval_kind; + bool parsed_interval_kind = false; +}; + + +class IntervalLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + if (state == 0) + { + auto begin = pos; + auto init_expected = expected; + ASTPtr string_literal; + //// A String literal followed INTERVAL keyword, + /// the literal can be a part of an expression or + /// include Number and INTERVAL TYPE at the same time + if (ParserStringLiteral{}.parse(pos, string_literal, expected)) + { + String literal; + if (string_literal->as().value.tryGet(literal)) + { + Tokens tokens(literal.data(), literal.data() + literal.size()); + IParser::Pos token_pos(tokens, 0); + Expected token_expected; + ASTPtr expr; + + if (!ParserNumber{}.parse(token_pos, expr, token_expected)) + return false; + else + { + /// case: INTERVAL '1' HOUR + /// back to begin + if (!token_pos.isValid()) + { + pos = begin; + expected = init_expected; + } + else + { + /// case: INTERVAL '1 HOUR' + if (!parseIntervalKind(token_pos, token_expected, interval_kind)) + return false; + + result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; + finished = true; + return true; + } + } + } + } + state = 1; + } + + if (state == 1) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + if (!mergeElement()) + return false; + + result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result)}; + finished = true; + } + } + + return true; + } + +private: + IntervalKind interval_kind; +}; + + +class CaseLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (state == 0) + { + auto old_pos = pos; + has_case_expr = !ParserKeyword("WHEN").ignore(pos, expected); + pos = old_pos; + + state = 1; + } + + if (state == 1) + { + if (ParserKeyword("WHEN").ignore(pos, expected)) + { + if ((has_case_expr || !result.empty()) && !mergeElement()) + return false; + + action = Action::OPERAND; + state = 2; + } + else if (ParserKeyword("ELSE").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + action = Action::OPERAND; + state = 3; + } + else if (ParserKeyword("END").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + Field field_with_null; + ASTLiteral null_literal(field_with_null); + result.push_back(std::make_shared(null_literal)); + + if (has_case_expr) + result = {makeASTFunction("caseWithExpression", result)}; + else + result = {makeASTFunction("multiIf", result)}; + finished = true; + } + } + + if (state == 2) + { + if (ParserKeyword("THEN").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + action = Action::OPERAND; + state = 1; + } + } + + if (state == 3) + { + if (ParserKeyword("END").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + if (has_case_expr) + result = {makeASTFunction("caseWithExpression", result)}; + else + result = {makeASTFunction("multiIf", result)}; + + finished = true; + } + } + + return true; + } + +private: + bool has_case_expr; +}; + + +bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + IParser::Pos begin = pos; + + if (ParserCastOperator().parse(pos, node, expected)) + return true; + + pos = begin; + + /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. + if (pos->type == TokenType::Minus) + { + if (ParserLiteral().parse(pos, node, expected)) + return true; + } + return false; +} + +bool ParseDateOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no DATE keyword, go to the nested parser. + if (!ParserKeyword("DATE").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDate", expr); + return true; +} + +bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no TIMESTAMP keyword, go to the nested parser. + if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDateTime", expr); + + return true; +} + +bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + static std::vector> op_table({ + {"+", Operator("plus", 11)}, + {"-", Operator("minus", 11)}, + {"*", Operator("multiply", 12)}, + {"/", Operator("divide", 12)}, + {"%", Operator("modulo", 12)}, + {"MOD", Operator("modulo", 12)}, + {"DIV", Operator("intDiv", 12)}, + {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, + {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, + {"<", Operator("less", 9, 2, OperatorType::Comparison)}, + {">", Operator("greater", 9, 2, OperatorType::Comparison)}, + {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, + {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, + {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, + {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, + {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, + {"LIKE", Operator("like", 9)}, + {"ILIKE", Operator("ilike", 9)}, + {"NOT LIKE", Operator("notLike", 9)}, + {"NOT ILIKE", Operator("notILike", 9)}, + {"IN", Operator("in", 9)}, + {"NOT IN", Operator("notIn", 9)}, + {"GLOBAL IN", Operator("globalIn", 9)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, + {"?", Operator("", 2, 0, OperatorType::StartIf)}, + {":", Operator("if", 3, 3, OperatorType::FinishIf)}, + {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, + {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, + {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, + {"::", Operator("CAST", 14, 2, OperatorType::Cast)} + }); + + static std::vector> op_table_unary({ + {"NOT", Operator("not", 5, 1)}, + {"-", Operator("negate", 13, 1)} + }); + + auto lambda_operator = Operator("lambda", 1, 2); + auto finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); + + ParserCompoundIdentifier identifier_parser(false, true); + ParserNumber number_parser; + ParserAsterisk asterisk_parser; + ParserLiteral literal_parser; + ParserTupleOfLiterals tuple_literal_parser; + ParserArrayOfLiterals array_literal_parser; + ParserSubstitution substitution_parser; + ParserMySQLGlobalVariable mysql_global_variable_parser; + + ParserKeyword any_parser("ANY"); + ParserKeyword all_parser("ALL"); + + // Recursion + ParserQualifiedAsterisk qualified_asterisk_parser; + ParserColumnsMatcher columns_matcher_parser; + ParserSubquery subquery_parser; + + Action next = Action::OPERAND; + + std::vector> storage; + storage.push_back(std::make_unique()); + + while (pos.isValid()) + { + if (!storage.back()->parse(pos, expected, next)) + return false; + + storage.back()->syncDepth(pos); + + if (storage.back()->isFinished()) + { + next = Action::OPERATOR; + + ASTPtr res; + if (!storage.back()->getResult(res)) + return false; + + storage.pop_back(); + storage.back()->pushOperand(res); + continue; + } + + if (next == Action::OPERAND) + { + next = Action::OPERATOR; + ASTPtr tmp; + + /// Special case for cast expression + if (storage.back()->previousType() != OperatorType::TupleElement && + ParseCastExpression(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + continue; + } + + if (storage.back()->previousType() == OperatorType::Comparison) + { + auto old_pos = pos; + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + + if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + + if (subquery_function_type != SubqueryFunctionType::NONE) + { + Operator prev_op; + ASTPtr function, argument; + + if (!storage.back()->popOperator(prev_op)) + return false; + if (!storage.back()->popOperand(argument)) + return false; + + function = makeASTFunction(prev_op.function_name, argument, tmp); + + if (!modifyAST(function, subquery_function_type)) + return false; + + storage.back()->pushOperand(std::move(function)); + continue; + } + else + { + pos = old_pos; + } + } + + /// Try to find any unary operators + auto cur_op = op_table_unary.begin(); + for (; cur_op != op_table_unary.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != op_table_unary.end()) + { + next = Action::OPERAND; + storage.back()->pushOperator(cur_op->second); + } + else if (parseOperator(pos, "INTERVAL", expected)) + { + next = Action::OPERAND; + storage.push_back(std::make_unique()); + } + else if (parseOperator(pos, "CASE", expected)) + { + next = Action::OPERAND; + storage.push_back(std::make_unique()); + } + else if (ParseDateOperatorExpression(pos, tmp, expected) || + ParseTimestampOperatorExpression(pos, tmp, expected) || + tuple_literal_parser.parse(pos, tmp, expected) || + array_literal_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + qualified_asterisk_parser.parse(pos, tmp, expected) || + columns_matcher_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + } + else if (identifier_parser.parse(pos, tmp, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { + ++pos; + + next = Action::OPERAND; + + String function_name = getIdentifierName(tmp); + String function_name_lowercase = Poco::toLower(function_name); + + if (function_name_lowercase == "cast") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "extract") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "substring") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "position") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "exists") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "trim") + storage.push_back(std::make_unique(false, false)); + else if (function_name_lowercase == "ltrim") + storage.push_back(std::make_unique(true, false)); + else if (function_name_lowercase == "rtrim") + storage.push_back(std::make_unique(false, true)); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + storage.push_back(std::make_unique("plus")); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + storage.push_back(std::make_unique("minus")); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + storage.push_back(std::make_unique()); + else if (function_name_lowercase == "grouping") + storage.push_back(std::make_unique(function_name_lowercase)); + else + storage.push_back(std::make_unique(function_name)); + } + else + { + storage.back()->pushOperand(std::move(tmp)); + } + } + else if (substitution_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + } + else if (pos->type == TokenType::OpeningRoundBracket) + { + if (subquery_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + continue; + } + next = Action::OPERAND; + storage.push_back(std::make_unique()); + ++pos; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + + next = Action::OPERAND; + storage.push_back(std::make_unique()); + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + storage.back()->pushOperand(std::move(tmp)); + } + else + { + break; + } + } + else + { + next = Action::OPERAND; + ASTPtr tmp; + + Expected stub; + if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) + break; + + /// Try to find operators from 'op_table' + auto cur_op = op_table.begin(); + for (; cur_op != op_table.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != op_table.end()) + { + auto op = cur_op->second; + + // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator + if (op.function_name == "and" && storage.back()->hasBetween()) + { + storage.back()->subBetween(); + op = finish_between_operator; + } + + while (storage.back()->previousPriority() >= op.priority) + { + ASTPtr function; + Operator prev_op; + storage.back()->popOperator(prev_op); + + if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) + { + op.arity += prev_op.arity - 1; + break; + } + + if (prev_op.type == OperatorType::FinishBetween) + { + Operator tmp_op; + if (!storage.back()->popOperator(tmp_op)) + return false; + + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return false; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; + + ASTs arguments; + if (!storage.back()->popLastNOperands(arguments, 3)) + return false; + + function = makeBetweenOperator(negative, arguments); + } + else + { + function = makeASTFunction(prev_op.function_name); + + if (!storage.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) + return false; + } + + storage.back()->pushOperand(function); + } + storage.back()->pushOperator(op); + + if (op.type == OperatorType::ArrayElement) + storage.push_back(std::make_unique()); + + // isNull & isNotNull is postfix unary operator + if (op.type == OperatorType::IsNull) + next = Action::OPERATOR; + + if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) + storage.back()->addBetween(); + + if (op.type == OperatorType::Cast) + { + next = Action::OPERATOR; + + ASTPtr type_ast; + if (!ParserDataType().parse(pos, type_ast, expected)) + return false; + + storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); + } + } + else if (parseOperator(pos, "->", expected)) + { + if (!storage.back()->parseLambda()) + return false; + + storage.back()->pushOperator(lambda_operator); + } + else if (storage.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) + { + if (!storage.back()->insertAlias(tmp)) + return false; + } + else if (pos->type == TokenType::Comma) + { + if (storage.size() == 1) + break; + } + else + { + break; + } + } + } + + // Check if we only have one starting layer + if (storage.size() > 1) + return false; + + if (!storage.back()->mergeElement()) + return false; + + if (!storage.back()->getResult(node)) + return false; + + storage.back()->syncDepth(pos); + + return true; +} + } diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 13e3b0adbd4..85915ea64f5 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -163,40 +163,39 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } // TEST - auto pos_test = pos; - auto expected_test = expected; - ASTPtr select_expression_list_test; - ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); + // auto pos_test = pos; + // auto expected_test = expected; + // ASTPtr select_expression_list_test; + // ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); - bool res_test = exp_list_for_select_clause2.parse(pos_test, select_expression_list_test, expected_test); - bool res = exp_list_for_select_clause.parse(pos, select_expression_list, expected); + // bool res_test = exp_list_for_select_clause2.parse(pos_test, select_expression_list_test, expected_test); + // bool res = exp_list_for_select_clause.parse(pos, select_expression_list, expected); - if (res != res_test && res) - throw Exception("PARSER TEST: old parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); + // if (res != res_test && res) + // throw Exception("PARSER TEST: old parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); - if (res != res_test && res_test) - throw Exception("PARSER TEST: new parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); + // if (res != res_test && res_test) + // throw Exception("PARSER TEST: new parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); - if (!res) - return false; + // if (!res) + // return false; - if (select_expression_list->getTreeHash() != select_expression_list_test->getTreeHash()) - throw Exception("PARSER TEST: Tree hash differs. \n\n OLD: \n" + select_expression_list_test->dumpTree() - + "\n\n NEW: \n" + select_expression_list->dumpTree(), ErrorCodes::SYNTAX_ERROR); + // if (select_expression_list->getTreeHash() != select_expression_list_test->getTreeHash()) + // throw Exception("PARSER TEST: Tree hash differs. \n\n OLD: \n" + select_expression_list_test->dumpTree() + // + "\n\n NEW: \n" + select_expression_list->dumpTree(), ErrorCodes::SYNTAX_ERROR); - // ParserToken test(TokenType::DollarSign); - // if (!test.ignore(pos, expected)) - // { - // if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) - // return false; - // } - // else - // { - // ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); - // ASTPtr select_expression_list; - // if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) - // return false; - // } + ParserToken test(TokenType::DollarSign); + if (!test.ignore(pos, expected)) + { + if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) + return false; + } + else + { + ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); + if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) + return false; + } } /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) From 9abc87de4b31859bc5ad73f1699399674e66d1e6 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 4 Aug 2022 09:32:14 +0000 Subject: [PATCH 040/173] Fix any/all, case, refactor, add comments --- src/Parsers/ExpressionListParsers.cpp | 407 +++++++++++++++----------- src/Parsers/IAST.cpp | 24 +- src/Parsers/IAST.h | 4 +- 3 files changed, 254 insertions(+), 181 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index eb09411f29e..dfdba4f19b9 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1118,7 +1118,6 @@ enum class Action * Operators can be grouped into some type if they have similar behaviour. * Certain operators are unique in terms of their behaviour, so they are assigned a separate type. */ - enum class OperatorType { None, @@ -1132,7 +1131,8 @@ enum class OperatorType FinishBetween, StartIf, FinishIf, - Cast + Cast, + Lambda }; /** Operator class stores parameters of the operator: @@ -1186,19 +1186,6 @@ public: void pushOperator(Operator op) { - /// Mergeable operators does not add depth compared to other operators - /// a AND b AND c => and(a, b, c) - if (op.type != OperatorType::Mergeable) - { - ++depth_diff; - ++depth_total; - } - else - { - depth_diff -= depth_total; - depth_total = 0; - } - operators.push_back(std::move(op)); } @@ -1270,6 +1257,8 @@ public: if (n > operands.size()) return false; + asts.reserve(asts.size() + n); + auto start = operands.begin() + operands.size() - n; asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); operands.erase(start, operands.end()); @@ -1277,10 +1266,13 @@ public: return true; } - /// Merge operators and operands into a single element. - /// Operators are previously sorted in ascending order, + /// Merge operators and operands into a single element (column), then push it to 'result' vector. + /// Operators are previously sorted in ascending order of priority + /// (operator with priority 1 has higher priority than operator with priority 2), /// so we can just merge them with operands starting from the end. /// + /// If we fail here it means that the query was incorrect and we should return an error. + /// bool mergeElement(bool push_to_result = true) { Operator cur_op; @@ -1342,9 +1334,6 @@ public: else pushOperand(node); - depth_diff -= depth_total; - depth_total = 0; - return res; } @@ -1358,7 +1347,7 @@ public: return true; } - if (!mergeElement()) + if (operands.size() != 1 || !operators.empty() || !mergeElement()) return false; /// 1. If there is already tuple do nothing @@ -1377,6 +1366,7 @@ public: return true; } + /// Put 'node' indentifier into the last operand as its alias bool insertAlias(ASTPtr node) { if (!mergeElement(false)) @@ -1408,15 +1398,6 @@ public: return open_between > 0; } - void syncDepth(IParser::Pos & pos) - { - for (; depth_diff > 0; --depth_diff) - pos.increaseDepth(); - - for (; depth_diff < 0; ++depth_diff) - pos.decreaseDepth(); - } - protected: std::vector operators; ASTs operands; @@ -1428,15 +1409,16 @@ protected: /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int open_between = 0; - /// We need to count depth (at least кщгпрдн) because of the segfault in the AST destructor, if the depth is too deep. - /// We change depth in two places, in both of which we don't have acces to the current IParser::Pos. - /// So we need to store the current difference of depth to later sync it in syncDepth(pos). - int depth_diff = 1; - - /// Total depth allows us to decrease depth to the previous level (before entering our layer). - int depth_total = 1; + // bool allow_alias = true; + // bool allow_alias_without_as_keyword = true; }; + +/// Basic layer for a function with certain separator and end tokens: +/// 1. If we parse a separator we should merge current operands and operators +/// into one element and push in to 'result' vector. +/// 2. If we parse an ending token, we should merge everything as in (1) and +/// also set 'finished' flag. template class BaseLayer : public Layer { @@ -1464,6 +1446,7 @@ public: } }; +/// General function layer class FunctionLayer : public Layer { public: @@ -1473,6 +1456,13 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// | 0 | 1 | 2 | + /// f(ALL ...)(ALL ...) FILTER ... + /// + /// 0. Parse ALL and DISTINCT qualifiers (-> 1) + /// 1. Parse all the arguments and ending token (-> 2), possibly with parameters list (-> 1) + /// 2. Create function, possibly parse FILTER and OVER window definitions (finished) + if (state == 0) { state = 1; @@ -1657,7 +1647,7 @@ private: ASTPtr parameters; }; - +/// Layer for priority brackets and tuple function class RoundBracketsLayer : public Layer { public: @@ -1705,6 +1695,7 @@ private: bool is_tuple = false; }; +/// Layer for array square brackets operator class ArrayLayer : public BaseLayer { public: @@ -1720,8 +1711,9 @@ public: } }; -// FunctionBaseLayer - +/// Layer for arrayElement square brackets operator +/// This layer does not create a function, it is only needed to parse closing token +/// and return only one element. class ArrayElementLayer : public BaseLayer { public: @@ -1736,6 +1728,11 @@ class CastLayer : public Layer public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// CAST(x [AS alias1], T [AS alias2]) or CAST(x [AS alias1] AS T) + /// + /// 0. Parse all the cases (-> 1) + /// 1. Parse closing token (finished) + ParserKeyword as_keyword_parser("AS"); ASTPtr alias; @@ -1829,7 +1826,7 @@ class ExtractLayer : public BaseLayer 2), otherwise (-> 1) + /// 1. Basic parser + /// 2. Parse closing bracket (finished) + if (state == 0) { IParser::Pos begin = pos; @@ -1851,7 +1856,6 @@ public: if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) { - parsed_interval_kind = true; state = 2; return true; } @@ -1884,16 +1888,24 @@ public: private: IntervalKind interval_kind; - bool parsed_interval_kind = false; }; class SubstringLayer : public Layer { public: + bool getResult(ASTPtr & op) override + { + op = makeASTFunction("substring", std::move(result)); + return true; + } + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { - /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) - /// The latter will be parsed normally as a function later. + /// Either SUBSTRING(expr FROM start [FOR length]) or SUBSTRING(expr, start, length) + /// + /// 0: Parse first separator: FROM or comma (-> 1) + /// 1: Parse second separator: FOR or comma (-> 2) + /// 1 or 2: Parse closing bracket (finished) if (state == 0) { @@ -1930,9 +1942,7 @@ public: if (!mergeElement()) return false; - result = {makeASTFunction("substring", result)}; finished = true; - return true; } } @@ -1943,8 +1953,23 @@ public: class PositionLayer : public Layer { public: + bool getResult(ASTPtr & op) override + { + if (state == 2) + std::swap(result[1], result[0]); + + op = makeASTFunction("position", std::move(result)); + return true; + } + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// position(haystack, needle[, start_pos]) or position(needle IN haystack) + /// + /// 0: Parse separator: comma (-> 1) or IN (-> 2) + /// 1: Parse second separator: comma + /// 1 or 2: Parse closing bracket (finished) + if (state == 0) { if (ParserToken(TokenType::Comma).ignore(pos, expected)) @@ -1985,13 +2010,7 @@ public: if (!mergeElement()) return false; - if (state == 1) - result = {makeASTFunction("position", result)}; - else - result = {makeASTFunction("position", result[1], result[0])}; - finished = true; - return true; } } @@ -2040,6 +2059,12 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// Handles all possible TRIM/LTRIM/RTRIM call variants + /// + /// 0: If flags 'trim_left' and 'trim_right' are set (-> 2). + /// If not, try to parse 'BOTH', 'LEADING', 'TRAILING' keywords, + /// then if char_override (-> 1), else (-> 2) + /// 1. Parse 'FROM' keyword (-> 2) + /// 2. Parse closing token, choose name, add arguments (finished) if (state == 0) { @@ -2154,14 +2179,9 @@ public: else { if (trim_left) - { function_name = "trimLeft"; - } else - { - /// trim_right == false not possible function_name = "trimRight"; - } } } @@ -2210,6 +2230,11 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// DATEADD(YEAR, 1, date) or DATEADD(INTERVAL 1 YEAR, date); + /// + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + if (state == 0) { if (parseIntervalKind(pos, expected, interval_kind)) @@ -2218,13 +2243,10 @@ public: return false; action = Action::OPERAND; - state = 2; parsed_interval_kind = true; } - else - { - state = 1; - } + + state = 1; } if (state == 1) @@ -2232,29 +2254,6 @@ public: return BaseLayer::parse(pos, expected, action); } - if (state == 2) - { - if (ParserToken(TokenType::Comma).ignore(pos, expected)) - { - action = Action::OPERAND; - - if (!mergeElement()) - return false; - - state = 3; - } - } - - if (state == 3) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!mergeElement()) - return false; - - finished = true; - } - } return true; } @@ -2288,6 +2287,9 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + if (state == 0) { if (parseIntervalKind(pos, expected, interval_kind)) @@ -2320,6 +2322,11 @@ class IntervalLayer : public Layer public: bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { + /// INTERVAL 1 HOUR or INTERVAL expr HOUR + /// + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + if (state == 0) { auto begin = pos; @@ -2339,7 +2346,9 @@ public: ASTPtr expr; if (!ParserNumber{}.parse(token_pos, expr, token_expected)) + { return false; + } else { /// case: INTERVAL '1' HOUR @@ -2363,6 +2372,7 @@ public: } } state = 1; + return true; } if (state == 1) @@ -2390,6 +2400,13 @@ class CaseLayer : public Layer public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { + /// CASE [x] WHEN expr THEN expr [WHEN expr THEN expr [...]] [ELSE expr] END + /// + /// 0. Check if we have case expression [x] (-> 1) + /// 1. Parse keywords: WHEN (-> 2), ELSE (-> 3), END (finished) + /// 2. Parse THEN keyword (-> 1) + /// 3. Parse END keyword (finished) + if (state == 0) { auto old_pos = pos; @@ -2564,7 +2581,8 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, - {"::", Operator("CAST", 14, 2, OperatorType::Cast)} + {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, + {"->", Operator("lambda", 1, 2, OperatorType::Lambda)} }); static std::vector> op_table_unary({ @@ -2572,7 +2590,6 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"-", Operator("negate", 13, 1)} }); - auto lambda_operator = Operator("lambda", 1, 2); auto finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); ParserCompoundIdentifier identifier_parser(false, true); @@ -2594,26 +2611,24 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Action next = Action::OPERAND; - std::vector> storage; - storage.push_back(std::make_unique()); + std::vector> layers; + layers.push_back(std::make_unique()); while (pos.isValid()) { - if (!storage.back()->parse(pos, expected, next)) + if (!layers.back()->parse(pos, expected, next)) return false; - storage.back()->syncDepth(pos); - - if (storage.back()->isFinished()) + if (layers.back()->isFinished()) { next = Action::OPERATOR; ASTPtr res; - if (!storage.back()->getResult(res)) + if (!layers.back()->getResult(res)) return false; - storage.pop_back(); - storage.back()->pushOperand(res); + layers.pop_back(); + layers.back()->pushOperand(res); continue; } @@ -2623,16 +2638,15 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr tmp; /// Special case for cast expression - if (storage.back()->previousType() != OperatorType::TupleElement && + if (layers.back()->previousType() != OperatorType::TupleElement && ParseCastExpression(pos, tmp, expected)) { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); continue; } - if (storage.back()->previousType() == OperatorType::Comparison) + if (layers.back()->previousType() == OperatorType::Comparison) { - auto old_pos = pos; SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) @@ -2645,9 +2659,9 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Operator prev_op; ASTPtr function, argument; - if (!storage.back()->popOperator(prev_op)) + if (!layers.back()->popOperator(prev_op)) return false; - if (!storage.back()->popOperand(argument)) + if (!layers.back()->popOperand(argument)) return false; function = makeASTFunction(prev_op.function_name, argument, tmp); @@ -2655,13 +2669,9 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!modifyAST(function, subquery_function_type)) return false; - storage.back()->pushOperand(std::move(function)); + layers.back()->pushOperand(std::move(function)); continue; } - else - { - pos = old_pos; - } } /// Try to find any unary operators @@ -2675,29 +2685,72 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (cur_op != op_table_unary.end()) { next = Action::OPERAND; - storage.back()->pushOperator(cur_op->second); - } - else if (parseOperator(pos, "INTERVAL", expected)) - { - next = Action::OPERAND; - storage.push_back(std::make_unique()); + layers.back()->pushOperator(cur_op->second); + continue; } + + auto old_pos = pos; + std::unique_ptr layer; + if (parseOperator(pos, "INTERVAL", expected)) + layer = std::make_unique(); else if (parseOperator(pos, "CASE", expected)) + layer = std::make_unique(); + + /// Here we check that CASE or INTERVAL is not an identifier + /// It is needed for backwards compatibility + if (layer) { - next = Action::OPERAND; - storage.push_back(std::make_unique()); + Expected stub; + + auto stub_cur_op = op_table.begin(); + for (; stub_cur_op != op_table.end(); ++stub_cur_op) + { + if (parseOperator(pos, stub_cur_op->first, stub)) + break; + } + + auto check_pos = pos; + + if (stub_cur_op != op_table.end() || + ParserToken(TokenType::Comma).ignore(pos, stub) || + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, stub) || + ParserToken(TokenType::ClosingSquareBracket).ignore(pos, stub) || + ParserToken(TokenType::Semicolon).ignore(pos, stub) || + ParserKeyword("AS").ignore(pos, stub) || + ParserKeyword("FROM").ignore(pos, stub) || + !pos.isValid()) + { + pos = old_pos; + } + else if (ParserAlias(true).ignore(check_pos, stub) && + (ParserToken(TokenType::Comma).ignore(check_pos, stub) || + ParserToken(TokenType::ClosingRoundBracket).ignore(check_pos, stub) || + ParserToken(TokenType::ClosingSquareBracket).ignore(check_pos, stub) || + ParserToken(TokenType::Semicolon).ignore(check_pos, stub) || + ParserKeyword("FROM").ignore(check_pos, stub) || + !check_pos.isValid())) + { + pos = old_pos; + } + else + { + next = Action::OPERAND; + layers.push_back(std::move(layer)); + continue; + } } - else if (ParseDateOperatorExpression(pos, tmp, expected) || - ParseTimestampOperatorExpression(pos, tmp, expected) || - tuple_literal_parser.parse(pos, tmp, expected) || - array_literal_parser.parse(pos, tmp, expected) || - number_parser.parse(pos, tmp, expected) || - literal_parser.parse(pos, tmp, expected) || - asterisk_parser.parse(pos, tmp, expected) || - qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) + + if (ParseDateOperatorExpression(pos, tmp, expected) || + ParseTimestampOperatorExpression(pos, tmp, expected) || + tuple_literal_parser.parse(pos, tmp, expected) || + array_literal_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + qualified_asterisk_parser.parse(pos, tmp, expected) || + columns_matcher_parser.parse(pos, tmp, expected)) { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); } else if (identifier_parser.parse(pos, tmp, expected)) { @@ -2711,53 +2764,53 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String function_name_lowercase = Poco::toLower(function_name); if (function_name_lowercase == "cast") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "extract") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "substring") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "position") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "exists") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "trim") - storage.push_back(std::make_unique(false, false)); + layers.push_back(std::make_unique(false, false)); else if (function_name_lowercase == "ltrim") - storage.push_back(std::make_unique(true, false)); + layers.push_back(std::make_unique(true, false)); else if (function_name_lowercase == "rtrim") - storage.push_back(std::make_unique(false, true)); + layers.push_back(std::make_unique(false, true)); else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - storage.push_back(std::make_unique("plus")); + layers.push_back(std::make_unique("plus")); else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - storage.push_back(std::make_unique("minus")); + layers.push_back(std::make_unique("minus")); else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); else if (function_name_lowercase == "grouping") - storage.push_back(std::make_unique(function_name_lowercase)); + layers.push_back(std::make_unique(function_name_lowercase)); else - storage.push_back(std::make_unique(function_name)); + layers.push_back(std::make_unique(function_name)); } else { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); } } else if (substitution_parser.parse(pos, tmp, expected)) { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); } else if (pos->type == TokenType::OpeningRoundBracket) { if (subquery_parser.parse(pos, tmp, expected)) { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); continue; } next = Action::OPERAND; - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); ++pos; } else if (pos->type == TokenType::OpeningSquareBracket) @@ -2765,11 +2818,11 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; next = Action::OPERAND; - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); } else if (mysql_global_variable_parser.parse(pos, tmp, expected)) { - storage.back()->pushOperand(std::move(tmp)); + layers.back()->pushOperand(std::move(tmp)); } else { @@ -2781,6 +2834,10 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERAND; ASTPtr tmp; + /// ParserExpression can be called in this part of the query: + /// ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL + /// + /// 'IN PARTITION' here is not an 'IN' operator, so we should stop parsing immediately Expected stub; if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) break; @@ -2797,19 +2854,30 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto op = cur_op->second; - // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator - if (op.function_name == "and" && storage.back()->hasBetween()) + if (op.type == OperatorType::Lambda) { - storage.back()->subBetween(); + if (!layers.back()->parseLambda()) + return false; + + layers.back()->pushOperator(op); + continue; + } + + // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator + if (op.function_name == "and" && layers.back()->hasBetween()) + { + layers.back()->subBetween(); op = finish_between_operator; } - while (storage.back()->previousPriority() >= op.priority) + while (layers.back()->previousPriority() >= op.priority) { ASTPtr function; Operator prev_op; - storage.back()->popOperator(prev_op); + layers.back()->popOperator(prev_op); + /// Mergeable operators are operators that are merged into one function: + /// For example: 'a OR b OR c' -> 'or(a, b, c)' and not 'or(or(a,b), c)' if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) { op.arity += prev_op.arity - 1; @@ -2819,7 +2887,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (prev_op.type == OperatorType::FinishBetween) { Operator tmp_op; - if (!storage.back()->popOperator(tmp_op)) + if (!layers.back()->popOperator(tmp_op)) return false; if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) @@ -2828,7 +2896,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool negative = tmp_op.type == OperatorType::StartNotBetween; ASTs arguments; - if (!storage.back()->popLastNOperands(arguments, 3)) + if (!layers.back()->popLastNOperands(arguments, 3)) return false; function = makeBetweenOperator(negative, arguments); @@ -2837,23 +2905,23 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { function = makeASTFunction(prev_op.function_name); - if (!storage.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) + if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) return false; } - storage.back()->pushOperand(function); + layers.back()->pushOperand(function); } - storage.back()->pushOperator(op); + layers.back()->pushOperator(op); if (op.type == OperatorType::ArrayElement) - storage.push_back(std::make_unique()); + layers.push_back(std::make_unique()); // isNull & isNotNull is postfix unary operator if (op.type == OperatorType::IsNull) next = Action::OPERATOR; if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) - storage.back()->addBetween(); + layers.back()->addBetween(); if (op.type == OperatorType::Cast) { @@ -2863,24 +2931,17 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserDataType().parse(pos, type_ast, expected)) return false; - storage.back()->pushOperand(std::make_shared(queryToString(type_ast))); + layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); } } - else if (parseOperator(pos, "->", expected)) + else if (layers.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) { - if (!storage.back()->parseLambda()) - return false; - - storage.back()->pushOperator(lambda_operator); - } - else if (storage.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) - { - if (!storage.back()->insertAlias(tmp)) + if (!layers.back()->insertAlias(tmp)) return false; } else if (pos->type == TokenType::Comma) { - if (storage.size() == 1) + if (layers.size() == 1) break; } else @@ -2890,18 +2951,16 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - // Check if we only have one starting layer - if (storage.size() > 1) + // When we exit the loop we should be on the 1st level + if (layers.size() > 1) return false; - if (!storage.back()->mergeElement()) + if (!layers.back()->mergeElement()) return false; - if (!storage.back()->getResult(node)) + if (!layers.back()->getResult(node)) return false; - storage.back()->syncDepth(pos); - return true; } diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 72fdbd924f2..6bfdfc358c1 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -107,14 +107,28 @@ void IAST::updateTreeHashImpl(SipHash & hash_state) const } -size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const +size_t IAST::checkDepthImpl(size_t max_depth) const { - size_t res = level + 1; - for (const auto & child : children) + std::vector> stack; + stack.reserve(children.size()); + + for (const auto & i: children) + stack.push_back({i, 1}); + + size_t res = 0; + + while (!stack.empty()) { - if (level >= max_depth) + auto top = stack.back(); + stack.pop_back(); + + if (top.second >= max_depth) throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); - res = std::max(res, child->checkDepthImpl(max_depth, level + 1)); + + res = std::max(res, top.second); + + for (const auto & i: top.first->children) + stack.push_back({i, top.second + 1}); } return res; diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 5714a829693..b9bdb18f144 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -92,7 +92,7 @@ public: */ size_t checkDepth(size_t max_depth) const { - return checkDepthImpl(max_depth, 0); + return checkDepthImpl(max_depth); } /** Get total number of tree elements @@ -273,7 +273,7 @@ public: static const char * hilite_none; private: - size_t checkDepthImpl(size_t max_depth, size_t level) const; + size_t checkDepthImpl(size_t max_depth) const; /// This deleter is used in ~IAST to avoid possible stack overflow in destructor. std::list * deleter = nullptr; From bde088f766187269eb7440a7d9d291f95022e60c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 4 Aug 2022 09:39:33 +0000 Subject: [PATCH 041/173] Fix test --- .../0_stateless/01019_alter_materialized_view_consistent.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh index b4fc336713b..e90085f4e8e 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh @@ -39,7 +39,7 @@ function insert_thread() { done wait - is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv;") + is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;") if [ "$is_done" -eq "1" ]; then break @@ -58,7 +58,7 @@ function alter_thread() { -q "${ALTER[$RANDOM % 2]}" sleep "0.0$RANDOM" - is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv;") + is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;") if [ "$is_done" -eq "1" ]; then break @@ -75,7 +75,7 @@ timeout 120 bash -c alter_thread & wait -$CLICKHOUSE_CLIENT -q "SELECT countIf(\`case\` = 1) > 0 AND countIf(\`case\` = 2) > 0 FROM mv LIMIT 1;" +$CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv LIMIT 1;" $CLICKHOUSE_CLIENT -q "SELECT 'inconsistencies', count() FROM mv WHERE test == 0;" $CLICKHOUSE_CLIENT -q "DROP VIEW mv" From 7577b78eab02c7fd00c9577114646fae8e5444be Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 4 Aug 2022 10:06:18 +0000 Subject: [PATCH 042/173] Fix style --- src/Parsers/ExpressionListParsers.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index dfdba4f19b9..e50bca6ae49 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1366,7 +1366,7 @@ public: return true; } - /// Put 'node' indentifier into the last operand as its alias + /// Put 'node' identifier into the last operand as its alias bool insertAlias(ASTPtr node) { if (!mergeElement(false)) @@ -1405,7 +1405,7 @@ protected: bool finished = false; int state = 0; - /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. + /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int open_between = 0; @@ -2325,7 +2325,7 @@ public: /// INTERVAL 1 HOUR or INTERVAL expr HOUR /// /// 0. Try to parse interval_kind (-> 1) - /// 1. Basic parser + /// 1. Basic parser if (state == 0) { From ff2e4ff156117927918f2d055183b61adb72004c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 10 Aug 2022 04:58:59 +0000 Subject: [PATCH 043/173] Fix tests, refactor --- src/Parsers/ExpressionListParsers.cpp | 212 ++++++++++-------- .../00984_parser_stack_overflow.reference | 2 - 2 files changed, 119 insertions(+), 95 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e50bca6ae49..7c0b9d8e56a 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1165,12 +1165,17 @@ public: * - Manages operands and operators for the future elements (arguments) * - Combines operands and operator into one element * - Parsers separators and endings - * - Combines resulting arguments into a function + * - Combines resulting elements into a function */ class Layer { public: + Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : + allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) + { + } + virtual ~Layer() = default; bool popOperator(Operator & op) @@ -1207,24 +1212,21 @@ public: void pushResult(ASTPtr op) { - result.push_back(std::move(op)); + elements.push_back(std::move(op)); } virtual bool getResult(ASTPtr & op) { - if (result.size() == 1) + if (elements.size() == 1) { - op = std::move(result[0]); + op = std::move(elements[0]); return true; } return false; } - virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) - { - return true; - } + virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) = 0; bool isFinished() const { @@ -1266,14 +1268,14 @@ public: return true; } - /// Merge operators and operands into a single element (column), then push it to 'result' vector. + /// Merge operators and operands into a single element (column), then push it to 'elements' vector. /// Operators are previously sorted in ascending order of priority /// (operator with priority 1 has higher priority than operator with priority 2), /// so we can just merge them with operands starting from the end. /// /// If we fail here it means that the query was incorrect and we should return an error. /// - bool mergeElement(bool push_to_result = true) + bool mergeElement(bool push_to_elements = true) { Operator cur_op; while (popOperator(cur_op)) @@ -1329,7 +1331,7 @@ public: bool res = empty(); - if (push_to_result) + if (push_to_elements) pushResult(node); else pushOperand(node); @@ -1351,16 +1353,16 @@ public: return false; /// 1. If there is already tuple do nothing - if (tryGetFunctionName(result.back()) == "tuple") + if (tryGetFunctionName(elements.back()) == "tuple") { - pushOperand(result.back()); - result.pop_back(); + pushOperand(elements.back()); + elements.pop_back(); } - /// 2. Put all result in a single tuple + /// 2. Put all elements in a single tuple else { - auto function = makeASTFunction("tuple", result); - result.clear(); + auto function = makeASTFunction("tuple", elements); + elements.clear(); pushOperand(function); } return true; @@ -1383,40 +1385,60 @@ public: return true; } - void addBetween() - { - ++open_between; - } +public: + /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. + /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. + int between_counter = 0; - void subBetween() - { - --open_between; - } - - bool hasBetween() const - { - return open_between > 0; - } + bool allow_alias = true; + bool allow_alias_without_as_keyword = true; protected: std::vector operators; ASTs operands; - ASTs result; + ASTs elements; bool finished = false; int state = 0; +}; - /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. - /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. - int open_between = 0; - // bool allow_alias = true; - // bool allow_alias_without_as_keyword = true; +class SingleElementLayer : public Layer +{ +public: + + SingleElementLayer() : Layer(false, false) + { + } + + bool getResult(ASTPtr & op) override + { + /// We can exit the main cycle outside the parse() function, + /// so we need to merge the element here + if (!mergeElement()) + return false; + + if (elements.size() == 1) + { + op = std::move(elements[0]); + return true; + } + + return false; + } + + bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & /*action*/) override + { + if (pos->type == TokenType::Comma) + finished = true; + + return true; + } }; /// Basic layer for a function with certain separator and end tokens: /// 1. If we parse a separator we should merge current operands and operators -/// into one element and push in to 'result' vector. +/// into one element and push in to 'elements' vector. /// 2. If we parse an ending token, we should merge everything as in (1) and /// also set 'finished' flag. template @@ -1435,7 +1457,7 @@ public: { action = Action::OPERATOR; - if (!empty() || !result.empty()) + if (!empty() || !elements.empty()) if (!mergeElement()) return false; @@ -1512,7 +1534,7 @@ public: { action = Action::OPERATOR; - if (!empty() || !result.empty()) + if (!empty() || !elements.empty()) if (!mergeElement()) return false; @@ -1521,7 +1543,7 @@ public: /** Check for a common error case - often due to the complexity of quoting command-line arguments, * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, - * and the query silently returns an unexpected result. + * and the query silently returns an unexpected elements. */ if (function_name == "toDate" && contents_end - contents_begin == strlen("2014-01-01") @@ -1544,7 +1566,7 @@ public: if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { parameters = std::make_shared(); - std::swap(parameters->children, result); + std::swap(parameters->children, elements); action = Action::OPERAND; /// Parametric aggregate functions cannot have DISTINCT in parameters list. @@ -1592,7 +1614,7 @@ public: if (has_distinct) function_name += "Distinct"; - auto function_node = makeASTFunction(function_name, std::move(result)); + auto function_node = makeASTFunction(function_name, std::move(elements)); if (parameters) { @@ -1629,7 +1651,7 @@ public: return false; } - result = {function_node}; + elements = {function_node}; finished = true; } @@ -1654,10 +1676,10 @@ public: bool getResult(ASTPtr & op) override { // Round brackets can mean priority operator as well as function tuple() - if (!is_tuple && result.size() == 1) - op = std::move(result[0]); + if (!is_tuple && elements.size() == 1) + op = std::move(elements[0]); else - op = makeASTFunction("tuple", std::move(result)); + op = makeASTFunction("tuple", std::move(elements)); return true; } @@ -1681,8 +1703,8 @@ public: return false; // Special case for (('a', 'b')) -> tuple(('a', 'b')) - if (!is_tuple && result.size() == 1) - if (auto * literal = result[0]->as()) + if (!is_tuple && elements.size() == 1) + if (auto * literal = elements[0]->as()) if (literal->value.getType() == Field::Types::Tuple) is_tuple = true; @@ -1701,7 +1723,7 @@ class ArrayLayer : public BaseLayer(); subquery->children.push_back(node); - result = {makeASTFunction("exists", subquery)}; + elements = {makeASTFunction("exists", subquery)}; finished = true; @@ -2052,7 +2074,7 @@ public: bool getResult(ASTPtr & op) override { - op = makeASTFunction(function_name, std::move(result)); + op = makeASTFunction(function_name, std::move(elements)); return true; } @@ -2112,8 +2134,8 @@ public: if (!mergeElement()) return false; - to_remove = makeASTFunction("regexpQuoteMeta", result[0]); - result.clear(); + to_remove = makeASTFunction("regexpQuoteMeta", elements[0]); + elements.clear(); state = 2; } } @@ -2187,8 +2209,8 @@ public: if (char_override) { - result.push_back(pattern_node); - result.push_back(std::make_shared("")); + elements.push_back(pattern_node); + elements.push_back(std::make_shared("")); } finished = true; @@ -2218,11 +2240,11 @@ public: { if (parsed_interval_kind) { - result[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result[0]); - op = makeASTFunction(function_name, result[1], result[0]); + elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); + op = makeASTFunction(function_name, elements[1], elements[0]); } else - op = makeASTFunction(function_name, std::move(result)); + op = makeASTFunction(function_name, std::move(elements)); return true; } @@ -2271,16 +2293,16 @@ public: { if (parsed_interval_kind) { - if (result.size() == 2) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1]); - else if (result.size() == 3) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), result[0], result[1], result[2]); + if (elements.size() == 2) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); + else if (elements.size() == 3) + op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); else return false; } else { - op = makeASTFunction("dateDiff", std::move(result)); + op = makeASTFunction("dateDiff", std::move(elements)); } return true; } @@ -2364,7 +2386,7 @@ public: if (!parseIntervalKind(token_pos, token_expected, interval_kind)) return false; - result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; + elements = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; finished = true; return true; } @@ -2382,7 +2404,7 @@ public: if (!mergeElement()) return false; - result = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), result)}; + elements = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements)}; finished = true; } } @@ -2420,7 +2442,7 @@ public: { if (ParserKeyword("WHEN").ignore(pos, expected)) { - if ((has_case_expr || !result.empty()) && !mergeElement()) + if ((has_case_expr || !elements.empty()) && !mergeElement()) return false; action = Action::OPERAND; @@ -2441,12 +2463,12 @@ public: Field field_with_null; ASTLiteral null_literal(field_with_null); - result.push_back(std::make_shared(null_literal)); + elements.push_back(std::make_shared(null_literal)); if (has_case_expr) - result = {makeASTFunction("caseWithExpression", result)}; + elements = {makeASTFunction("caseWithExpression", elements)}; else - result = {makeASTFunction("multiIf", result)}; + elements = {makeASTFunction("multiIf", elements)}; finished = true; } } @@ -2471,9 +2493,9 @@ public: return false; if (has_case_expr) - result = {makeASTFunction("caseWithExpression", result)}; + elements = {makeASTFunction("caseWithExpression", elements)}; else - result = {makeASTFunction("multiIf", result)}; + elements = {makeASTFunction("multiIf", elements)}; finished = true; } @@ -2612,7 +2634,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Action next = Action::OPERAND; std::vector> layers; - layers.push_back(std::make_unique()); + layers.push_back(std::make_unique()); while (pos.isValid()) { @@ -2628,6 +2650,13 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; layers.pop_back(); + + if (layers.empty()) + { + node = res; + return true; + } + layers.back()->pushOperand(res); continue; } @@ -2705,6 +2734,10 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto stub_cur_op = op_table.begin(); for (; stub_cur_op != op_table.end(); ++stub_cur_op) { + /// Minus can be unary + /// TODO: check cases 'select case - number from table' and 'select case -x when 10 then 5 else 0 end' + if (stub_cur_op->second.function_name == "minus") + continue; if (parseOperator(pos, stub_cur_op->first, stub)) break; } @@ -2864,9 +2897,9 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator - if (op.function_name == "and" && layers.back()->hasBetween()) + if (op.function_name == "and" && layers.back()->between_counter) { - layers.back()->subBetween(); + layers.back()->between_counter--; op = finish_between_operator; } @@ -2921,7 +2954,7 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next = Action::OPERATOR; if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) - layers.back()->addBetween(); + layers.back()->between_counter++; if (op.type == OperatorType::Cast) { @@ -2934,16 +2967,12 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); } } - else if (layers.size() > 1 && ParserAlias(true).parse(pos, tmp, expected)) + else if (layers.back()->allow_alias && + ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) { if (!layers.back()->insertAlias(tmp)) return false; } - else if (pos->type == TokenType::Comma) - { - if (layers.size() == 1) - break; - } else { break; @@ -2955,9 +2984,6 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (layers.size() > 1) return false; - if (!layers.back()->mergeElement()) - return false; - if (!layers.back()->getResult(node)) return false; diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.reference b/tests/queries/0_stateless/00984_parser_stack_overflow.reference index a46c80e9233..0cf6a1f96df 100644 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.reference +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.reference @@ -1,6 +1,4 @@ exceeded exceeded -exceeded -exceeded 20002 1 From 7797f84f19799883f359c45b617b809fba931ed4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Aug 2022 19:20:17 +0000 Subject: [PATCH 044/173] Refactor a little bit. --- src/Parsers/ExpressionListParsers.cpp | 719 ++++++++++++++------------ 1 file changed, 383 insertions(+), 336 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 7c0b9d8e56a..cccc7a4670b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1171,7 +1171,7 @@ public: class Layer { public: - Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : + explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) { } @@ -1385,7 +1385,6 @@ public: return true; } -public: /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int between_counter = 0; @@ -2566,9 +2565,51 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } +struct ParserExpressionImpl +{ + static std::vector> op_table; + static std::vector> op_table_unary; + static Operator finish_between_operator; + + ParserCompoundIdentifier identifier_parser{false, true}; + ParserNumber number_parser; + ParserAsterisk asterisk_parser; + ParserLiteral literal_parser; + ParserTupleOfLiterals tuple_literal_parser; + ParserArrayOfLiterals array_literal_parser; + ParserSubstitution substitution_parser; + ParserMySQLGlobalVariable mysql_global_variable_parser; + + ParserKeyword any_parser{"ANY"}; + ParserKeyword all_parser{"ALL"}; + + // Recursion + ParserQualifiedAsterisk qualified_asterisk_parser; + ParserColumnsMatcher columns_matcher_parser; + ParserSubquery subquery_parser; + + bool parse(IParser::Pos & pos, ASTPtr & node, Expected & expected); + + enum class ParseResult + { + OPERAND, + OPERATOR, + ERROR, + END, + }; + + using Layers = std::vector>; + + ParseResult tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); + static ParseResult tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); +}; + bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - static std::vector> op_table({ + return ParserExpressionImpl().parse(pos, node, expected); +} + +std::vector> ParserExpressionImpl::op_table({ {"+", Operator("plus", 11)}, {"-", Operator("minus", 11)}, {"*", Operator("multiply", 12)}, @@ -2607,30 +2648,15 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) {"->", Operator("lambda", 1, 2, OperatorType::Lambda)} }); - static std::vector> op_table_unary({ +std::vector> ParserExpressionImpl::op_table_unary({ {"NOT", Operator("not", 5, 1)}, {"-", Operator("negate", 13, 1)} }); - auto finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); - - ParserCompoundIdentifier identifier_parser(false, true); - ParserNumber number_parser; - ParserAsterisk asterisk_parser; - ParserLiteral literal_parser; - ParserTupleOfLiterals tuple_literal_parser; - ParserArrayOfLiterals array_literal_parser; - ParserSubstitution substitution_parser; - ParserMySQLGlobalVariable mysql_global_variable_parser; - - ParserKeyword any_parser("ANY"); - ParserKeyword all_parser("ALL"); - - // Recursion - ParserQualifiedAsterisk qualified_asterisk_parser; - ParserColumnsMatcher columns_matcher_parser; - ParserSubquery subquery_parser; +Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); +bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ Action next = Action::OPERAND; std::vector> layers; @@ -2661,323 +2687,21 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) continue; } + ParseResult result; + if (next == Action::OPERAND) - { - next = Action::OPERATOR; - ASTPtr tmp; - - /// Special case for cast expression - if (layers.back()->previousType() != OperatorType::TupleElement && - ParseCastExpression(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - continue; - } - - if (layers.back()->previousType() == OperatorType::Comparison) - { - SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - - if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) - subquery_function_type = SubqueryFunctionType::ANY; - else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) - subquery_function_type = SubqueryFunctionType::ALL; - - if (subquery_function_type != SubqueryFunctionType::NONE) - { - Operator prev_op; - ASTPtr function, argument; - - if (!layers.back()->popOperator(prev_op)) - return false; - if (!layers.back()->popOperand(argument)) - return false; - - function = makeASTFunction(prev_op.function_name, argument, tmp); - - if (!modifyAST(function, subquery_function_type)) - return false; - - layers.back()->pushOperand(std::move(function)); - continue; - } - } - - /// Try to find any unary operators - auto cur_op = op_table_unary.begin(); - for (; cur_op != op_table_unary.end(); ++cur_op) - { - if (parseOperator(pos, cur_op->first, expected)) - break; - } - - if (cur_op != op_table_unary.end()) - { - next = Action::OPERAND; - layers.back()->pushOperator(cur_op->second); - continue; - } - - auto old_pos = pos; - std::unique_ptr layer; - if (parseOperator(pos, "INTERVAL", expected)) - layer = std::make_unique(); - else if (parseOperator(pos, "CASE", expected)) - layer = std::make_unique(); - - /// Here we check that CASE or INTERVAL is not an identifier - /// It is needed for backwards compatibility - if (layer) - { - Expected stub; - - auto stub_cur_op = op_table.begin(); - for (; stub_cur_op != op_table.end(); ++stub_cur_op) - { - /// Minus can be unary - /// TODO: check cases 'select case - number from table' and 'select case -x when 10 then 5 else 0 end' - if (stub_cur_op->second.function_name == "minus") - continue; - if (parseOperator(pos, stub_cur_op->first, stub)) - break; - } - - auto check_pos = pos; - - if (stub_cur_op != op_table.end() || - ParserToken(TokenType::Comma).ignore(pos, stub) || - ParserToken(TokenType::ClosingRoundBracket).ignore(pos, stub) || - ParserToken(TokenType::ClosingSquareBracket).ignore(pos, stub) || - ParserToken(TokenType::Semicolon).ignore(pos, stub) || - ParserKeyword("AS").ignore(pos, stub) || - ParserKeyword("FROM").ignore(pos, stub) || - !pos.isValid()) - { - pos = old_pos; - } - else if (ParserAlias(true).ignore(check_pos, stub) && - (ParserToken(TokenType::Comma).ignore(check_pos, stub) || - ParserToken(TokenType::ClosingRoundBracket).ignore(check_pos, stub) || - ParserToken(TokenType::ClosingSquareBracket).ignore(check_pos, stub) || - ParserToken(TokenType::Semicolon).ignore(check_pos, stub) || - ParserKeyword("FROM").ignore(check_pos, stub) || - !check_pos.isValid())) - { - pos = old_pos; - } - else - { - next = Action::OPERAND; - layers.push_back(std::move(layer)); - continue; - } - } - - if (ParseDateOperatorExpression(pos, tmp, expected) || - ParseTimestampOperatorExpression(pos, tmp, expected) || - tuple_literal_parser.parse(pos, tmp, expected) || - array_literal_parser.parse(pos, tmp, expected) || - number_parser.parse(pos, tmp, expected) || - literal_parser.parse(pos, tmp, expected) || - asterisk_parser.parse(pos, tmp, expected) || - qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (identifier_parser.parse(pos, tmp, expected)) - { - if (pos->type == TokenType::OpeningRoundBracket) - { - ++pos; - - next = Action::OPERAND; - - String function_name = getIdentifierName(tmp); - String function_name_lowercase = Poco::toLower(function_name); - - if (function_name_lowercase == "cast") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "extract") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "substring") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "position") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "exists") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "trim") - layers.push_back(std::make_unique(false, false)); - else if (function_name_lowercase == "ltrim") - layers.push_back(std::make_unique(true, false)); - else if (function_name_lowercase == "rtrim") - layers.push_back(std::make_unique(false, true)); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - layers.push_back(std::make_unique("plus")); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - layers.push_back(std::make_unique("minus")); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "grouping") - layers.push_back(std::make_unique(function_name_lowercase)); - else - layers.push_back(std::make_unique(function_name)); - } - else - { - layers.back()->pushOperand(std::move(tmp)); - } - } - else if (substitution_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (pos->type == TokenType::OpeningRoundBracket) - { - if (subquery_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - continue; - } - next = Action::OPERAND; - layers.push_back(std::make_unique()); - ++pos; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - - next = Action::OPERAND; - layers.push_back(std::make_unique()); - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else - { - break; - } - } + result = tryParseOperator(layers, pos, expected); else - { + result = tryParseOperand(layers, pos, expected); + + if (result == ParseResult::END) + break; + else if (result == ParseResult::ERROR) + return false; + else if (result == ParseResult::OPERATOR) + next = Action::OPERATOR; + else if (result == ParseResult::OPERAND) next = Action::OPERAND; - ASTPtr tmp; - - /// ParserExpression can be called in this part of the query: - /// ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL - /// - /// 'IN PARTITION' here is not an 'IN' operator, so we should stop parsing immediately - Expected stub; - if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) - break; - - /// Try to find operators from 'op_table' - auto cur_op = op_table.begin(); - for (; cur_op != op_table.end(); ++cur_op) - { - if (parseOperator(pos, cur_op->first, expected)) - break; - } - - if (cur_op != op_table.end()) - { - auto op = cur_op->second; - - if (op.type == OperatorType::Lambda) - { - if (!layers.back()->parseLambda()) - return false; - - layers.back()->pushOperator(op); - continue; - } - - // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator - if (op.function_name == "and" && layers.back()->between_counter) - { - layers.back()->between_counter--; - op = finish_between_operator; - } - - while (layers.back()->previousPriority() >= op.priority) - { - ASTPtr function; - Operator prev_op; - layers.back()->popOperator(prev_op); - - /// Mergeable operators are operators that are merged into one function: - /// For example: 'a OR b OR c' -> 'or(a, b, c)' and not 'or(or(a,b), c)' - if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) - { - op.arity += prev_op.arity - 1; - break; - } - - if (prev_op.type == OperatorType::FinishBetween) - { - Operator tmp_op; - if (!layers.back()->popOperator(tmp_op)) - return false; - - if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) - return false; - - bool negative = tmp_op.type == OperatorType::StartNotBetween; - - ASTs arguments; - if (!layers.back()->popLastNOperands(arguments, 3)) - return false; - - function = makeBetweenOperator(negative, arguments); - } - else - { - function = makeASTFunction(prev_op.function_name); - - if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) - return false; - } - - layers.back()->pushOperand(function); - } - layers.back()->pushOperator(op); - - if (op.type == OperatorType::ArrayElement) - layers.push_back(std::make_unique()); - - // isNull & isNotNull is postfix unary operator - if (op.type == OperatorType::IsNull) - next = Action::OPERATOR; - - if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) - layers.back()->between_counter++; - - if (op.type == OperatorType::Cast) - { - next = Action::OPERATOR; - - ASTPtr type_ast; - if (!ParserDataType().parse(pos, type_ast, expected)) - return false; - - layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); - } - } - else if (layers.back()->allow_alias && - ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) - { - if (!layers.back()->insertAlias(tmp)) - return false; - } - else - { - break; - } - } } // When we exit the loop we should be on the 1st level @@ -2990,4 +2714,327 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +{ + ASTPtr tmp; + + /// Special case for cast expression + if (layers.back()->previousType() != OperatorType::TupleElement && + ParseCastExpression(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return ParseResult::OPERATOR; + } + + if (layers.back()->previousType() == OperatorType::Comparison) + { + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + + if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + + if (subquery_function_type != SubqueryFunctionType::NONE) + { + Operator prev_op; + ASTPtr function, argument; + + if (!layers.back()->popOperator(prev_op)) + return ParseResult::ERROR; + if (!layers.back()->popOperand(argument)) + return ParseResult::ERROR; + + function = makeASTFunction(prev_op.function_name, argument, tmp); + + if (!modifyAST(function, subquery_function_type)) + return ParseResult::ERROR; + + layers.back()->pushOperand(std::move(function)); + return ParseResult::OPERATOR; + } + } + + /// Try to find any unary operators + auto cur_op = op_table_unary.begin(); + for (; cur_op != op_table_unary.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != op_table_unary.end()) + { + layers.back()->pushOperator(cur_op->second); + return ParseResult::OPERAND; + } + + auto old_pos = pos; + std::unique_ptr layer; + if (parseOperator(pos, "INTERVAL", expected)) + layer = std::make_unique(); + else if (parseOperator(pos, "CASE", expected)) + layer = std::make_unique(); + + /// Here we check that CASE or INTERVAL is not an identifier + /// It is needed for backwards compatibility + if (layer) + { + Expected stub; + + auto stub_cur_op = op_table.begin(); + for (; stub_cur_op != op_table.end(); ++stub_cur_op) + { + /// Minus can be unary + /// TODO: check cases 'select case - number from table' and 'select case -x when 10 then 5 else 0 end' + if (stub_cur_op->second.function_name == "minus") + continue; + if (parseOperator(pos, stub_cur_op->first, stub)) + break; + } + + auto check_pos = pos; + + if (stub_cur_op != op_table.end() || + ParserToken(TokenType::Comma).ignore(pos, stub) || + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, stub) || + ParserToken(TokenType::ClosingSquareBracket).ignore(pos, stub) || + ParserToken(TokenType::Semicolon).ignore(pos, stub) || + ParserKeyword("AS").ignore(pos, stub) || + ParserKeyword("FROM").ignore(pos, stub) || + !pos.isValid()) + { + pos = old_pos; + } + else if (ParserAlias(true).ignore(check_pos, stub) && + (ParserToken(TokenType::Comma).ignore(check_pos, stub) || + ParserToken(TokenType::ClosingRoundBracket).ignore(check_pos, stub) || + ParserToken(TokenType::ClosingSquareBracket).ignore(check_pos, stub) || + ParserToken(TokenType::Semicolon).ignore(check_pos, stub) || + ParserKeyword("FROM").ignore(check_pos, stub) || + !check_pos.isValid())) + { + pos = old_pos; + } + else + { + layers.push_back(std::move(layer)); + return ParseResult::OPERAND; + } + } + + if (ParseDateOperatorExpression(pos, tmp, expected) || + ParseTimestampOperatorExpression(pos, tmp, expected) || + tuple_literal_parser.parse(pos, tmp, expected) || + array_literal_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + qualified_asterisk_parser.parse(pos, tmp, expected) || + columns_matcher_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (identifier_parser.parse(pos, tmp, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { + ++pos; + + String function_name = getIdentifierName(tmp); + String function_name_lowercase = Poco::toLower(function_name); + + if (function_name_lowercase == "cast") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "extract") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "substring") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "position") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "exists") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "trim") + layers.push_back(std::make_unique(false, false)); + else if (function_name_lowercase == "ltrim") + layers.push_back(std::make_unique(true, false)); + else if (function_name_lowercase == "rtrim") + layers.push_back(std::make_unique(false, true)); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + layers.push_back(std::make_unique("plus")); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + layers.push_back(std::make_unique("minus")); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + layers.push_back(std::make_unique()); + else if (function_name_lowercase == "grouping") + layers.push_back(std::make_unique(function_name_lowercase)); + else + layers.push_back(std::make_unique(function_name)); + + return ParseResult::OPERAND; + } + else + { + layers.back()->pushOperand(std::move(tmp)); + } + } + else if (substitution_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (pos->type == TokenType::OpeningRoundBracket) + { + if (subquery_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return ParseResult::OPERATOR; + } + + ++pos; + layers.push_back(std::make_unique()); + return ParseResult::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return ParseResult::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return ParseResult::END; + } + + return ParseResult::OPERATOR; +} + + +ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +{ + ASTPtr tmp; + + /// ParserExpression can be called in this part of the query: + /// ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL + /// + /// 'IN PARTITION' here is not an 'IN' operator, so we should stop parsing immediately + Expected stub; + if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) + return ParseResult::END; + + /// Try to find operators from 'op_table' + auto cur_op = op_table.begin(); + for (; cur_op != op_table.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op == op_table.end()) + { + if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) + { + if (!layers.back()->insertAlias(tmp)) + return ParseResult::ERROR; + + return ParseResult::OPERAND; + } + + return ParseResult::END; + } + + auto op = cur_op->second; + + if (op.type == OperatorType::Lambda) + { + if (!layers.back()->parseLambda()) + return ParseResult::ERROR; + + layers.back()->pushOperator(op); + return ParseResult::OPERAND; + } + + // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator + if (op.function_name == "and" && layers.back()->between_counter) + { + layers.back()->between_counter--; + op = finish_between_operator; + } + + while (layers.back()->previousPriority() >= op.priority) + { + ASTPtr function; + Operator prev_op; + layers.back()->popOperator(prev_op); + + /// Mergeable operators are operators that are merged into one function: + /// For example: 'a OR b OR c' -> 'or(a, b, c)' and not 'or(or(a,b), c)' + if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) + { + op.arity += prev_op.arity - 1; + break; + } + + if (prev_op.type == OperatorType::FinishBetween) + { + Operator tmp_op; + if (!layers.back()->popOperator(tmp_op)) + return ParseResult::ERROR; + + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return ParseResult::ERROR; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; + + ASTs arguments; + if (!layers.back()->popLastNOperands(arguments, 3)) + return ParseResult::ERROR; + + function = makeBetweenOperator(negative, arguments); + } + else + { + function = makeASTFunction(prev_op.function_name); + + if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) + return ParseResult::ERROR; + } + + layers.back()->pushOperand(function); + } + layers.back()->pushOperator(op); + + if (op.type == OperatorType::ArrayElement) + layers.push_back(std::make_unique()); + + + ParseResult next = ParseResult::OPERAND; + + // isNull & isNotNull is postfix unary operator + if (op.type == OperatorType::IsNull) + next = ParseResult::OPERATOR; + + if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) + layers.back()->between_counter++; + + if (op.type == OperatorType::Cast) + { + next = ParseResult::OPERATOR; + + ASTPtr type_ast; + if (!ParserDataType().parse(pos, type_ast, expected)) + return ParseResult::ERROR; + + layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); + } + + return next; +} + } From 87182ccd51c1249f5b7d36f14ad8912aa3c43b30 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 045/173] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c399f01c565..1407395bf89 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 08a3df0a3e3..9d5535aa923 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index cdddd28adeb..20f4fa559f9 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -72,6 +72,7 @@ #include #include +#include namespace ProfileEvents { @@ -396,10 +397,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From 10f87612ebf599016e3b1ea47083f67363132ef8 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 046/173] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 45e8d29542f3a373d0b436f82b40a0cd2d608403 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 047/173] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From e20b2ed6eb19c3f471e94a6d7cbdaecd4eeb7a66 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 048/173] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 844bd7c3d7975a571c6c28a6de77390aef16eb69 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 049/173] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From fdaffac96b20c49c6ebed4c3babac2aa64e9fd9c Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 050/173] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 20758da3947550dc41445dea09eb6c9d91ddd1a3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 051/173] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 08022a818925c708807341c5631c6482bd17ef6e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 052/173] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 516a6c0844543d44d34feca5314b74000dff4f87 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 053/173] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 30ce50faff20570d379861286b85f46bc866070e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 054/173] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 8ee2a40a4c49c10c76005e535ca295da5ee8e696 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Tue, 16 Aug 2022 20:10:44 -0400 Subject: [PATCH 055/173] adding missing headers --- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index a780e18d333..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index f1fc13d2c48..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { From c2c457ea0e44a2453474153a78a3a133772ae7f0 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 17 Aug 2022 06:03:41 -0700 Subject: [PATCH 056/173] Kusto-phase1: Change the dialect to Enum, rename sql_dialect to dialect, set limit to subquery --- src/Client/ClientBase.cpp | 4 ++-- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 +++- src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/executeQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLLimit.cpp | 9 ++++++++- src/Parsers/Kusto/ParserKQLLimit.h | 5 +++++ src/Parsers/Kusto/ParserKQLOperators.h | 1 + src/Parsers/Kusto/ParserKQLQuery.cpp | 8 +++++++- src/Parsers/Kusto/ParserKQLTable.cpp | 1 + src/Parsers/tests/gtest_Parser.cpp | 12 ++++++------ 11 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1407395bf89..871a7849d5b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -308,9 +308,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; - const String & sql_dialect = settings.sql_dialect; + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); else parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d5535aa923..24f6d610a81 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,7 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ - M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ + M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 616026520db..54e1f882d58 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) - +IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, + {{"clickhouse", Dialect::clickhouse}, + {"kusto", Dialect::kusto}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 308d53ff690..3f52fa44237 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +enum class Dialect +{ + clickhouse, + kusto, + kusto_auto, +}; + +DECLARE_SETTING_ENUM(Dialect) } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 20f4fa559f9..8bd629f1adc 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -397,10 +397,9 @@ static std::tuple executeQueryImpl( String query_table; try { - const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto" && !internal) + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 4f7eddd9662..ece04f644cc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include namespace DB { @@ -46,7 +48,12 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) return false; pos = begin; diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index d425659499d..c234985b0a6 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,10 +8,15 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { +public: + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index ed6ebba2441..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f48..7f6fcbcdb70 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -95,8 +96,14 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; + kql_limit_p.setTableName(table_name); if (!kql_limit_p.parse(pos, limit_length, expected)) return false; + else + { + if (limit_length) + tables = std::move(limit_length); + } if (!kql_filter_p.parse(pos, where_expression, expected)) return false; @@ -114,7 +121,6 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 29fabd5056c..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index cb0b49aecbb..3575e8ba175 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,23 +308,23 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", @@ -336,7 +336,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" }, { "Customers | sort by FirstName asc", From 6b57b219a4997eef0275c3b4e5bcfb2c0968c81f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 20 Aug 2022 20:01:27 -0700 Subject: [PATCH 057/173] Kusto-phase1: remove unused variable --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a31104..f3c402a80be 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -121,7 +121,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String bin_column; String last_string; String column_name; - int character_passed = 0; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { @@ -160,7 +159,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - character_passed++; + if (String(pos->begin, pos->end) == ")") { expr_aggregation = expr_aggregation + " AS " + column_name; From eab8b7b42d72ee01aabe057290453ed8f21c2e5e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Sep 2022 08:25:08 +0000 Subject: [PATCH 058/173] Always start embedded Keeper in async mode --- programs/server/Server.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d788270ecf9..5e5a1be0b8f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1279,18 +1279,8 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (has_zookeeper) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + global_context->initializeKeeperDispatcher(/* start_async */ true); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & From 11fb90b2816f01efe46b4bca0a9c6ea3ac8c4fa7 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 5 Sep 2022 16:21:16 +0000 Subject: [PATCH 059/173] Better --- src/Parsers/ExpressionElementParsers.cpp | 68 +- src/Parsers/ExpressionElementParsers.h | 10 - src/Parsers/ExpressionListParsers.cpp | 764 ++++-------------- src/Parsers/ExpressionListParsers.h | 257 +----- src/Parsers/ParserSelectQuery.cpp | 37 +- .../00984_parser_stack_overflow.reference | 1 - .../00984_parser_stack_overflow.sh | 1 + tests/queries/0_stateless/_02.reference | 0 tests/queries/0_stateless/_02.sh | 98 --- 9 files changed, 189 insertions(+), 1047 deletions(-) delete mode 100644 tests/queries/0_stateless/_02.reference delete mode 100755 tests/queries/0_stateless/_02.sh diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 06dbb8172b5..dfce06fa7c5 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1865,42 +1865,60 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return true; } +template +struct CollectionOfLiteralsLayer +{ + explicit CollectionOfLiteralsLayer(IParser::Pos & pos) : literal_begin(pos) + { + ++pos; + } + + IParser::Pos literal_begin; + Collection arr; +}; + template bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != opening_bracket) return false; - Pos literal_begin = pos; + std::vector> layers; + layers.emplace_back(pos); - Collection arr; ParserLiteral literal_p; - ParserCollectionOfLiterals collection_p(opening_bracket, closing_bracket); - ++pos; while (pos.isValid()) { - if (!arr.empty()) + if (!layers.back().arr.empty()) { if (pos->type == closing_bracket) { std::shared_ptr literal; /// Parse one-element tuples (e.g. (1)) later as single values for backward compatibility. - if (std::is_same_v && arr.size() == 1) + if (std::is_same_v && layers.back().arr.size() == 1) return false; - literal = std::make_shared(std::move(arr)); - literal->begin = literal_begin; + literal = std::make_shared(std::move(layers.back().arr)); + literal->begin = layers.back().literal_begin; literal->end = ++pos; - node = literal; - return true; + + layers.pop_back(); + + if (layers.empty()) + { + node = literal; + return true; + } + + layers.back().arr.push_back(literal->value); } else if (pos->type == TokenType::Comma) { ++pos; } - else if (pos->type == TokenType::Colon && std::is_same_v && arr.size() % 2 == 1) + else if (pos->type == TokenType::Colon && std::is_same_v && layers.back().arr.size() % 2 == 1) { ++pos; } @@ -1912,10 +1930,12 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, } ASTPtr literal_node; - if (!literal_p.parse(pos, literal_node, expected) && !collection_p.parse(pos, literal_node, expected)) + if (literal_p.parse(pos, literal_node, expected)) + layers.back().arr.push_back(literal_node->as().value); + else if (pos->type == opening_bracket) + layers.emplace_back(pos); + else return false; - - arr.push_back(literal_node->as().value); } expected.add(pos, getTokenName(closing_bracket)); @@ -2411,26 +2431,6 @@ bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & e } -bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - return ParserSubquery().parse(pos, node, expected) - || ParserCastOperator().parse(pos, node, expected) - || ParserTupleOfLiterals().parse(pos, node, expected) - || ParserParenthesisExpression().parse(pos, node, expected) - || ParserArrayOfLiterals().parse(pos, node, expected) - || ParserArray().parse(pos, node, expected) - || ParserLiteral().parse(pos, node, expected) - || ParserCase().parse(pos, node, expected) - || ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function. - || ParserFunction().parse(pos, node, expected) - || ParserQualifiedAsterisk().parse(pos, node, expected) - || ParserAsterisk().parse(pos, node, expected) - || ParserCompoundIdentifier(false, true).parse(pos, node, expected) - || ParserSubstitution().parse(pos, node, expected) - || ParserMySQLGlobalVariable().parse(pos, node, expected); -} - - bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!elem_parser->parse(pos, node, expected)) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 3883631b61c..abd63f81424 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -394,16 +394,6 @@ protected: }; -/** The expression element is one of: an expression in parentheses, an array, a literal, a function, an identifier, an asterisk. - */ -class ParserExpressionElement : public IParserBase -{ -protected: - const char * getName() const override { return "element of expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - /** An expression element, possibly with an alias, if appropriate. */ class ParserWithOptionalAlias : public IParserBase diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index a6e499e9cea..546bf68a239 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -35,75 +35,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -const char * ParserMultiplicativeExpression::operators[] = -{ - "*", "multiply", - "/", "divide", - "%", "modulo", - "MOD", "modulo", - "DIV", "intDiv", - nullptr -}; - -const char * ParserUnaryExpression::operators[] = -{ - "-", "negate", - "NOT", "not", - nullptr -}; - -const char * ParserAdditiveExpression::operators[] = -{ - "+", "plus", - "-", "minus", - nullptr -}; - -const char * ParserComparisonExpression::operators[] = -{ - "==", "equals", - "!=", "notEquals", - "<>", "notEquals", - "<=", "lessOrEquals", - ">=", "greaterOrEquals", - "<", "less", - ">", "greater", - "=", "equals", - "LIKE", "like", - "ILIKE", "ilike", - "NOT LIKE", "notLike", - "NOT ILIKE", "notILike", - "IN", "in", - "NOT IN", "notIn", - "GLOBAL IN", "globalIn", - "GLOBAL NOT IN", "globalNotIn", - nullptr -}; - -const char * ParserComparisonExpression::overlapping_operators_to_skip[] = -{ - "IN PARTITION", - nullptr -}; - -const char * ParserLogicalNotExpression::operators[] = -{ - "NOT", "not", - nullptr -}; - -const char * ParserArrayElementExpression::operators[] = -{ - "[", "arrayElement", - nullptr -}; - -const char * ParserTupleElementExpression::operators[] = -{ - ".", "tupleElement", - nullptr -}; - bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -419,199 +350,6 @@ bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expect return true; } -bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// For the expression (subject [NOT] BETWEEN left AND right) - /// create an AST the same as for (subject >= left AND subject <= right). - - ParserKeyword s_not("NOT"); - ParserKeyword s_between("BETWEEN"); - ParserKeyword s_and("AND"); - - ASTPtr subject; - ASTPtr left; - ASTPtr right; - - if (!elem_parser.parse(pos, subject, expected)) - return false; - - bool negative = s_not.ignore(pos, expected); - - if (!s_between.ignore(pos, expected)) - { - if (negative) - --pos; - - /// No operator was parsed, just return element. - node = subject; - } - else - { - if (!elem_parser.parse(pos, left, expected)) - return false; - - if (!s_and.ignore(pos, expected)) - return false; - - if (!elem_parser.parse(pos, right, expected)) - return false; - - auto f_combined_expression = std::make_shared(); - auto args_combined_expression = std::make_shared(); - - /// [NOT] BETWEEN left AND right - auto f_left_expr = std::make_shared(); - auto args_left_expr = std::make_shared(); - - auto f_right_expr = std::make_shared(); - auto args_right_expr = std::make_shared(); - - args_left_expr->children.emplace_back(subject); - args_left_expr->children.emplace_back(left); - - args_right_expr->children.emplace_back(subject); - args_right_expr->children.emplace_back(right); - - if (negative) - { - /// NOT BETWEEN - f_left_expr->name = "less"; - f_right_expr->name = "greater"; - f_combined_expression->name = "or"; - } - else - { - /// BETWEEN - f_left_expr->name = "greaterOrEquals"; - f_right_expr->name = "lessOrEquals"; - f_combined_expression->name = "and"; - } - - f_left_expr->arguments = args_left_expr; - f_left_expr->children.emplace_back(f_left_expr->arguments); - - f_right_expr->arguments = args_right_expr; - f_right_expr->children.emplace_back(f_right_expr->arguments); - - args_combined_expression->children.emplace_back(f_left_expr); - args_combined_expression->children.emplace_back(f_right_expr); - - f_combined_expression->arguments = args_combined_expression; - f_combined_expression->children.emplace_back(f_combined_expression->arguments); - - node = f_combined_expression; - } - - return true; -} - -bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserToken symbol1(TokenType::QuestionMark); - ParserToken symbol2(TokenType::Colon); - - ASTPtr elem_cond; - ASTPtr elem_then; - ASTPtr elem_else; - - if (!elem_parser.parse(pos, elem_cond, expected)) - return false; - - if (!symbol1.ignore(pos, expected)) - node = elem_cond; - else - { - if (!elem_parser.parse(pos, elem_then, expected)) - return false; - - if (!symbol2.ignore(pos, expected)) - return false; - - if (!elem_parser.parse(pos, elem_else, expected)) - return false; - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - function->name = "if"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(elem_cond); - exp_list->children.push_back(elem_then); - exp_list->children.push_back(elem_else); - - node = function; - } - - return true; -} - - -bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserToken arrow(TokenType::Arrow); - ParserToken open(TokenType::OpeningRoundBracket); - ParserToken close(TokenType::ClosingRoundBracket); - - Pos begin = pos; - - do - { - ASTPtr inner_arguments; - ASTPtr expression; - - bool was_open = false; - - if (open.ignore(pos, expected)) - { - was_open = true; - } - - if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma)).parse(pos, inner_arguments, expected)) - break; - - if (was_open) - { - if (!close.ignore(pos, expected)) - break; - } - - if (!arrow.ignore(pos, expected)) - break; - - if (!elem_parser.parse(pos, expression, expected)) - return false; - - /// lambda(tuple(inner_arguments), expression) - - auto lambda = std::make_shared(); - node = lambda; - lambda->name = "lambda"; - - auto outer_arguments = std::make_shared(); - lambda->arguments = outer_arguments; - lambda->children.push_back(lambda->arguments); - - auto tuple = std::make_shared(); - outer_arguments->children.push_back(tuple); - tuple->name = "tuple"; - tuple->arguments = inner_arguments; - tuple->children.push_back(inner_arguments); - - outer_arguments->children.push_back(expression); - - return true; - } - while (false); - - pos = begin; - return elem_parser.parse(pos, node, expected); -} - ASTPtr makeBetweenOperator(bool negative, ASTs arguments) { @@ -742,27 +480,6 @@ bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Ex } -bool ParserUnaryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. - - if (pos->type == TokenType::Minus) - { - Pos begin = pos; - if (ParserCastOperator().parse(pos, node, expected)) - return true; - - pos = begin; - if (ParserLiteral().parse(pos, node, expected)) - return true; - - pos = begin; - } - - return operator_parser.parse(pos, node, expected); -} - - bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr expr_ast; @@ -784,26 +501,6 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect } -bool ParserArrayElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &expected) -{ - return ParserLeftAssociativeBinaryOperatorList{ - operators, - std::make_unique(std::make_unique()), - std::make_unique(false) - }.parse(pos, node, expected); -} - - -bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &expected) -{ - return ParserLeftAssociativeBinaryOperatorList{ - operators, - std::make_unique(std::make_unique()), - std::make_unique() - }.parse(pos, node, expected); -} - - ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) : impl(std::make_unique( is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), @@ -826,11 +523,6 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected return nested_parser.parse(pos, node, expected) && !node->children.empty(); } -bool ParserNotEmptyExpressionList2::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - return nested_parser.parse(pos, node, expected) && !node->children.empty(); -} - bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) @@ -901,179 +593,6 @@ bool ParserTTLExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } -bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr node_comp; - if (!elem_parser.parse(pos, node_comp, expected)) - return false; - - ParserKeyword s_is{"IS"}; - ParserKeyword s_not{"NOT"}; - ParserKeyword s_null{"NULL"}; - - if (s_is.ignore(pos, expected)) - { - bool is_not = false; - if (s_not.ignore(pos, expected)) - is_not = true; - - if (!s_null.ignore(pos, expected)) - return false; - - auto args = std::make_shared(); - args->children.push_back(node_comp); - - auto function = std::make_shared(); - function->name = is_not ? "isNotNull" : "isNull"; - function->arguments = args; - function->children.push_back(function->arguments); - - node = function; - } - else - node = node_comp; - - return true; -} - -bool ParserDateOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no DATE keyword, go to the nested parser. - if (!ParserKeyword("DATE").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = "toDate"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - -bool ParserTimestampOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no TIMESTAMP keyword, go to the nested parser. - if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = "toDateTime"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - -bool ParserIntervalOperatorExpression::parseArgumentAndIntervalKind( - Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected) -{ - auto begin = pos; - auto init_expected = expected; - ASTPtr string_literal; - //// A String literal followed INTERVAL keyword, - /// the literal can be a part of an expression or - /// include Number and INTERVAL TYPE at the same time - if (ParserStringLiteral{}.parse(pos, string_literal, expected)) - { - String literal; - if (string_literal->as().value.tryGet(literal)) - { - Tokens tokens(literal.data(), literal.data() + literal.size()); - Pos token_pos(tokens, 0); - Expected token_expected; - - if (!ParserNumber{}.parse(token_pos, expr, token_expected)) - return false; - else - { - /// case: INTERVAL '1' HOUR - /// back to begin - if (!token_pos.isValid()) - { - pos = begin; - expected = init_expected; - } - else - /// case: INTERVAL '1 HOUR' - return parseIntervalKind(token_pos, token_expected, interval_kind); - } - } - } - // case: INTERVAL expr HOUR - if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) - return false; - return parseIntervalKind(pos, expected, interval_kind); -} - -bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no INTERVAL keyword, go to the nested parser. - if (!ParserKeyword("INTERVAL").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - IntervalKind interval_kind; - if (!parseArgumentAndIntervalKind(pos, expr, interval_kind, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = interval_kind.toNameOfFunctionToIntervalDataType(); - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier id_parser; @@ -1170,6 +689,13 @@ public: String function_name; }; +enum class Checkpoint +{ + None, + Interval, + Case +}; + /** Layer is a class that represents context for parsing certain element, * that consists of other elements e.g. f(x1, x2, x3) * @@ -1403,6 +929,9 @@ public: bool allow_alias = true; bool allow_alias_without_as_keyword = true; + std::optional checkpoint_pos; + Checkpoint checkpoint_type; + protected: std::vector operators; ASTs operands; @@ -2576,10 +2105,13 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } +template struct ParserExpressionImpl { - static std::vector> op_table; - static std::vector> op_table_unary; + static std::vector> operators_table; + static std::vector> unary_operators_table; + static const char * overlapping_operators_to_skip[]; + static Operator finish_between_operator; ParserCompoundIdentifier identifier_parser{false, true}; @@ -2611,23 +2143,45 @@ struct ParserExpressionImpl using Layers = std::vector>; - ParseResult tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); - static ParseResult tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); + ParseResult tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); + static ParseResult tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); }; bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - return ParserExpressionImpl().parse(pos, node, expected); + return ParserExpressionImpl().parse(pos, node, expected); } -std::vector> ParserExpressionImpl::op_table({ - {"+", Operator("plus", 11)}, - {"-", Operator("minus", 11)}, - {"*", Operator("multiply", 12)}, - {"/", Operator("divide", 12)}, - {"%", Operator("modulo", 12)}, - {"MOD", Operator("modulo", 12)}, - {"DIV", Operator("intDiv", 12)}, +bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserExpressionImpl().parse(pos, node, expected); +} + +bool ParserLogicalOrExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + /// Parses everything with lower than "OR" operator priority + /// TODO: make ":" and "OR" different priority and check if everything is ok + return ParserExpressionImpl().parse(pos, node, expected); +} + +bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + /// Parses everything with lower than "INTERVAL" operator priority in previous parser + return ParserKeyword("INTERVAL").parse(pos, node, expected) + && ParserExpressionImpl().parse(pos, node, expected); +} + +template +std::vector> ParserExpressionImpl::operators_table({ + {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, + {"?", Operator("", 2, 0, OperatorType::StartIf)}, + {":", Operator("if", 3, 3, OperatorType::FinishIf)}, + {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, + {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, + {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, + {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, + {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, @@ -2636,12 +2190,6 @@ std::vector> ParserExpressionImpl::op_table({ {"<", Operator("less", 9, 2, OperatorType::Comparison)}, {">", Operator("greater", 9, 2, OperatorType::Comparison)}, {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, - {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, - {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, - {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, - {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, - {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, {"LIKE", Operator("like", 9)}, {"ILIKE", Operator("ilike", 9)}, {"NOT LIKE", Operator("notLike", 9)}, @@ -2650,82 +2198,104 @@ std::vector> ParserExpressionImpl::op_table({ {"NOT IN", Operator("notIn", 9)}, {"GLOBAL IN", Operator("globalIn", 9)}, {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, - {"?", Operator("", 2, 0, OperatorType::StartIf)}, - {":", Operator("if", 3, 3, OperatorType::FinishIf)}, - {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, - {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, + {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, + {"+", Operator("plus", 11)}, + {"-", Operator("minus", 11)}, + {"*", Operator("multiply", 12)}, + {"/", Operator("divide", 12)}, + {"%", Operator("modulo", 12)}, + {"MOD", Operator("modulo", 12)}, + {"DIV", Operator("intDiv", 12)}, + {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, - {"->", Operator("lambda", 1, 2, OperatorType::Lambda)} }); -std::vector> ParserExpressionImpl::op_table_unary({ +template +std::vector> ParserExpressionImpl::unary_operators_table({ {"NOT", Operator("not", 5, 1)}, {"-", Operator("negate", 13, 1)} }); -Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); +template +Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); -bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) +template +const char * ParserExpressionImpl::overlapping_operators_to_skip[] = +{ + "IN PARTITION", + nullptr +}; + +template +bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) { Action next = Action::OPERAND; std::vector> layers; - layers.push_back(std::make_unique()); + layers.push_back(std::make_unique()); - while (pos.isValid()) + while (true) { - if (!layers.back()->parse(pos, expected, next)) - return false; - - if (layers.back()->isFinished()) + while (pos.isValid()) { - next = Action::OPERATOR; + if (!layers.back()->parse(pos, expected, next)) + break; - ASTPtr res; - if (!layers.back()->getResult(res)) - return false; - - layers.pop_back(); - - if (layers.empty()) + if (layers.back()->isFinished()) { - node = res; - return true; + if (layers.size() == 1) + break; + + next = Action::OPERATOR; + + ASTPtr res; + if (!layers.back()->getResult(res)) + break; + + layers.pop_back(); + layers.back()->pushOperand(res); + continue; } - layers.back()->pushOperand(res); - continue; + ParseResult result; + + if (next == Action::OPERAND) + result = tryParseOperand(layers, pos, expected); + else + result = tryParseOperator(layers, pos, expected); + + if (result == ParseResult::END) + break; + else if (result == ParseResult::ERROR) + break; + else if (result == ParseResult::OPERATOR) + next = Action::OPERATOR; + else if (result == ParseResult::OPERAND) + next = Action::OPERAND; } - ParseResult result; + /// When we exit the loop we should be on the 1st level + if (layers.size() == 1 && layers.back()->getResult(node)) + return true; - if (next == Action::OPERAND) - result = tryParseOperator(layers, pos, expected); - else - result = tryParseOperand(layers, pos, expected); + layers.pop_back(); - if (result == ParseResult::END) - break; - else if (result == ParseResult::ERROR) + /// We try to check whether there were some checkpoint + while (!layers.empty() && !layers.back()->checkpoint_pos) + layers.pop_back(); + + if (layers.empty()) return false; - else if (result == ParseResult::OPERATOR) - next = Action::OPERATOR; - else if (result == ParseResult::OPERAND) - next = Action::OPERAND; + + /// Currently all checkpoints are located in operand section + next = Action::OPERAND; + pos = layers.back()->checkpoint_pos.value(); } - - // When we exit the loop we should be on the 1st level - if (layers.size() > 1) - return false; - - if (!layers.back()->getResult(node)) - return false; - - return true; } -ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +template +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2767,77 +2337,44 @@ ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers } /// Try to find any unary operators - auto cur_op = op_table_unary.begin(); - for (; cur_op != op_table_unary.end(); ++cur_op) + auto cur_op = unary_operators_table.begin(); + for (; cur_op != unary_operators_table.end(); ++cur_op) { if (parseOperator(pos, cur_op->first, expected)) break; } - if (cur_op != op_table_unary.end()) + if (cur_op != unary_operators_table.end()) { layers.back()->pushOperator(cur_op->second); return ParseResult::OPERAND; } auto old_pos = pos; - std::unique_ptr layer; - if (parseOperator(pos, "INTERVAL", expected)) - layer = std::make_unique(); - else if (parseOperator(pos, "CASE", expected)) - layer = std::make_unique(); - - /// Here we check that CASE or INTERVAL is not an identifier - /// It is needed for backwards compatibility - if (layer) + if (layers.back()->checkpoint_type != Checkpoint::Interval && parseOperator(pos, "INTERVAL", expected)) { - Expected stub; - - auto stub_cur_op = op_table.begin(); - for (; stub_cur_op != op_table.end(); ++stub_cur_op) - { - /// Minus can be unary - /// TODO: check cases 'select case - number from table' and 'select case -x when 10 then 5 else 0 end' - if (stub_cur_op->second.function_name == "minus") - continue; - if (parseOperator(pos, stub_cur_op->first, stub)) - break; - } - - auto check_pos = pos; - - if (stub_cur_op != op_table.end() || - ParserToken(TokenType::Comma).ignore(pos, stub) || - ParserToken(TokenType::ClosingRoundBracket).ignore(pos, stub) || - ParserToken(TokenType::ClosingSquareBracket).ignore(pos, stub) || - ParserToken(TokenType::Semicolon).ignore(pos, stub) || - ParserKeyword("AS").ignore(pos, stub) || - ParserKeyword("FROM").ignore(pos, stub) || - !pos.isValid()) - { - pos = old_pos; - } - else if (ParserAlias(true).ignore(check_pos, stub) && - (ParserToken(TokenType::Comma).ignore(check_pos, stub) || - ParserToken(TokenType::ClosingRoundBracket).ignore(check_pos, stub) || - ParserToken(TokenType::ClosingSquareBracket).ignore(check_pos, stub) || - ParserToken(TokenType::Semicolon).ignore(check_pos, stub) || - ParserKeyword("FROM").ignore(check_pos, stub) || - !check_pos.isValid())) - { - pos = old_pos; - } - else - { - layers.push_back(std::move(layer)); - return ParseResult::OPERAND; - } + layers.back()->checkpoint_pos = old_pos; + layers.back()->checkpoint_type = Checkpoint::Interval; + layers.push_back(std::make_unique()); + return ParseResult::OPERAND; + } + else if (layers.back()->checkpoint_type != Checkpoint::Case && parseOperator(pos, "CASE", expected)) + { + layers.back()->checkpoint_pos = old_pos; + layers.back()->checkpoint_type = Checkpoint::Case; + layers.push_back(std::make_unique()); + return ParseResult::OPERAND; + } + else if (layers.back()->checkpoint_pos) + { + layers.back()->checkpoint_pos.reset(); + layers.back()->checkpoint_type = Checkpoint::None; } if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || tuple_literal_parser.parse(pos, tmp, expected) || - array_literal_parser.parse(pos, tmp, expected) || + // array_literal_parser.parse(pos, tmp, expected) || number_parser.parse(pos, tmp, expected) || literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || @@ -2926,8 +2463,8 @@ ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers return ParseResult::OPERATOR; } - -ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +template +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2936,18 +2473,19 @@ ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & /// /// 'IN PARTITION' here is not an 'IN' operator, so we should stop parsing immediately Expected stub; - if (ParserKeyword("IN PARTITION").checkWithoutMoving(pos, stub)) - return ParseResult::END; + for (const char ** it = overlapping_operators_to_skip; *it; ++it) + if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) + return ParseResult::END; - /// Try to find operators from 'op_table' - auto cur_op = op_table.begin(); - for (; cur_op != op_table.end(); ++cur_op) + /// Try to find operators from 'operators_table' + auto cur_op = operators_table.begin(); + for (; cur_op != operators_table.end(); ++cur_op) { - if (parseOperator(pos, cur_op->first, expected)) + if (cur_op->second.priority >= MinPriority && parseOperator(pos, cur_op->first, expected)) break; } - if (cur_op == op_table.end()) + if (cur_op == operators_table.end()) { if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) { @@ -2971,7 +2509,7 @@ ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & return ParseResult::OPERAND; } - // 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator + /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.function_name == "and" && layers.back()->between_counter) { layers.back()->between_counter--; @@ -3027,7 +2565,7 @@ ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & ParseResult next = ParseResult::OPERAND; - // isNull & isNotNull is postfix unary operator + /// isNull & isNotNull are postfix unary operators if (op.type == OperatorType::IsNull) next = ParseResult::OPERATOR; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 90a5bbc0850..95f36fc1c3e 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -223,207 +223,20 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; - -class ParserArrayElementExpression : public IParserBase -{ -private: - static const char * operators[]; - -protected: - const char * getName() const override{ return "array element expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserTupleElementExpression : public IParserBase -{ -private: - static const char * operators[]; - -protected: - const char * getName() const override { return "tuple element expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserUnaryExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique(std::make_unique())}; - -protected: - const char * getName() const override { return "unary expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserMultiplicativeExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override { return "multiplicative expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - -/// DATE operator. "DATE '2001-01-01'" would be parsed as "toDate('2001-01-01')". -class ParserDateOperatorExpression : public IParserBase -{ -protected: - ParserMultiplicativeExpression next_parser; - - const char * getName() const override { return "DATE operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// TIMESTAMP operator. "TIMESTAMP '2001-01-01 12:34:56'" would be parsed as "toDateTime('2001-01-01 12:34:56')". -class ParserTimestampOperatorExpression : public IParserBase -{ -protected: - ParserDateOperatorExpression next_parser; - - const char * getName() const override { return "TIMESTAMP operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - /// Optional conversion to INTERVAL data type. Example: "INTERVAL x SECOND" parsed as "toIntervalSecond(x)". class ParserIntervalOperatorExpression : public IParserBase { protected: - ParserTimestampOperatorExpression next_parser; - const char * getName() const override { return "INTERVAL operator expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - static bool parseArgumentAndIntervalKind(Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected); -}; - -class ParserAdditiveExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override { return "additive expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserConcatExpression : public IParserBase -{ - ParserVariableArityOperatorList operator_parser {"||", "concat", std::make_unique()}; - -protected: - const char * getName() const override { return "string concatenation expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserBetweenExpression : public IParserBase -{ -private: - ParserConcatExpression elem_parser; - -protected: - const char * getName() const override { return "BETWEEN expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserComparisonExpression : public IParserBase -{ -private: - static const char * operators[]; - static const char * overlapping_operators_to_skip[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, - overlapping_operators_to_skip, std::make_unique(), true}; - -protected: - const char * getName() const override{ return "comparison expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - -/** Parser for nullity checking with IS (NOT) NULL. - */ -class ParserNullityChecking : public IParserBase -{ -private: - ParserComparisonExpression elem_parser; - -protected: - const char * getName() const override { return "nullity checking"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserLogicalNotExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override{ return "logical-NOT expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserLogicalAndExpression : public IParserBase -{ -private: - ParserVariableArityOperatorList operator_parser {"AND", "and", std::make_unique()}; - -protected: - const char * getName() const override { return "logical-AND expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } }; class ParserLogicalOrExpression : public IParserBase { -private: - ParserVariableArityOperatorList operator_parser {"OR", "or", std::make_unique()}; - protected: const char * getName() const override { return "logical-OR expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; @@ -432,27 +245,12 @@ protected: */ class ParserTernaryOperatorExpression : public IParserBase { -private: - ParserLogicalOrExpression elem_parser; - protected: const char * getName() const override { return "expression with ternary operator"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserLambdaExpression : public IParserBase -{ -private: - ParserTernaryOperatorExpression elem_parser; - -protected: - const char * getName() const override { return "lambda expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - class ParserExpression : public IParserBase { protected: @@ -474,9 +272,6 @@ protected: }; -using ParserExpression2 = ParserLambdaExpression; - - class ParserExpressionWithOptionalAlias : public IParserBase { public: @@ -521,56 +316,6 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserExpressionWithOptionalAlias2 : public IParserBase -{ -public: - explicit ParserExpressionWithOptionalAlias2(bool allow_alias_without_as_keyword, bool is_table_function = false) - : impl(std::make_unique( - is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), - allow_alias_without_as_keyword)) {} -protected: - ParserPtr impl; - - const char * getName() const override { return "expression with optional alias"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return impl->parse(pos, node, expected); - } -}; - -/** A comma-separated list of expressions, probably empty. */ -class ParserExpressionList2 : public IParserBase -{ -public: - explicit ParserExpressionList2(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) - : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} - -protected: - bool allow_alias_without_as_keyword; - bool is_table_function; // This expression list is used by a table function - - const char * getName() const override { return "list of expressions"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return ParserList( - std::make_unique(allow_alias_without_as_keyword, is_table_function), - std::make_unique(TokenType::Comma)) - .parse(pos, node, expected); - } -}; - -class ParserNotEmptyExpressionList2 : public IParserBase -{ -public: - explicit ParserNotEmptyExpressionList2(bool allow_alias_without_as_keyword) - : nested_parser(allow_alias_without_as_keyword) {} -private: - ParserExpressionList2 nested_parser; -protected: - const char * getName() const override { return "not empty list of expressions"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; class ParserOrderByExpressionList : public IParserBase { diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 85915ea64f5..66428b144bf 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -15,7 +15,6 @@ #include #include -#include namespace DB { @@ -162,40 +161,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->limit_with_ties = true; } - // TEST - // auto pos_test = pos; - // auto expected_test = expected; - // ASTPtr select_expression_list_test; - // ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); - - // bool res_test = exp_list_for_select_clause2.parse(pos_test, select_expression_list_test, expected_test); - // bool res = exp_list_for_select_clause.parse(pos, select_expression_list, expected); - - // if (res != res_test && res) - // throw Exception("PARSER TEST: old parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); - - // if (res != res_test && res_test) - // throw Exception("PARSER TEST: new parser cannot parse this query", ErrorCodes::SYNTAX_ERROR); - - // if (!res) - // return false; - - // if (select_expression_list->getTreeHash() != select_expression_list_test->getTreeHash()) - // throw Exception("PARSER TEST: Tree hash differs. \n\n OLD: \n" + select_expression_list_test->dumpTree() - // + "\n\n NEW: \n" + select_expression_list->dumpTree(), ErrorCodes::SYNTAX_ERROR); - - ParserToken test(TokenType::DollarSign); - if (!test.ignore(pos, expected)) - { - if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) - return false; - } - else - { - ParserNotEmptyExpressionList2 exp_list_for_select_clause2(true); - if (!exp_list_for_select_clause2.parse(pos, select_expression_list, expected)) - return false; - } + if (!exp_list_for_select_clause.parse(pos, select_expression_list, expected)) + return false; } /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.reference b/tests/queries/0_stateless/00984_parser_stack_overflow.reference index 0cf6a1f96df..35186521642 100644 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.reference +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.reference @@ -1,4 +1,3 @@ exceeded -exceeded 20002 1 diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.sh b/tests/queries/0_stateless/00984_parser_stack_overflow.sh index 329e51e774a..a7854b91ee2 100755 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.sh +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-asan # Such a huge timeout mostly for debug build. CLICKHOUSE_CURL_TIMEOUT=60 diff --git a/tests/queries/0_stateless/_02.reference b/tests/queries/0_stateless/_02.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/_02.sh b/tests/queries/0_stateless/_02.sh deleted file mode 100755 index 4d95ffbed4d..00000000000 --- a/tests/queries/0_stateless/_02.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -compare () { - if [ "$3" == 2 ];then - R_1=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST $1") - R_2=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST $2" 2>/dev/null) - - if [ "$R_1" == "$R_2" ];then - echo "equal (RES): $1"; - else - echo "============== not equal ===================" - echo "not equal (RES): $1"; - echo "# Original: $R_1"; - echo "# Ours: $R_2"; - echo "============================================" - fi - fi - - if [ "$2" != 0 ];then - R_1=$($CLICKHOUSE_CLIENT -q "SELECT $1") - R_2=$($CLICKHOUSE_CLIENT -q "SELECT \$ $1" 2>/dev/null) - - if [ "$R_1" == "$R_2" ];then - echo "equal (RES): SELECT $1"; - else - echo "============== not equal ===================" - echo "not equal (RES): SELECT $1"; - echo "# Original: $R_1"; - echo "# Ours: $R_2"; - echo "============================================" - fi - fi - - R_1=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST SELECT $1") - R_2=$($CLICKHOUSE_CLIENT -q "EXPLAIN AST SELECT \$ $1" 2>/dev/null) - - if [ "$R_1" == "$R_2" ];then - echo "equal (AST): SELECT $1"; - else - echo "============== not equal ===================" - echo "not equal (AST): SELECT $1"; - echo "# Original: $R_1"; - echo "# Ours: $R_2"; - echo "============================================" - fi -} - -# compare "1 + 1" -# compare "3 + 7 * 5 + 32 / 2 - 5 * 2" -# compare "100 MOD 5 DIV 20 MOD 5" -# compare "1 + 2 * 3 - 3 / 2 < 80 / 8 + 2 * 5" -# compare "20 MOD 10 > 200 DIV 6" -# compare "5 != 80 / 8 + 2 * 5" - -# compare "a.5" 0 -# compare "a.b.5" 0 -# compare "a.b.n.v" 0 -# compare "10 * a.b.5 / 3" 0 - -# compare "-1::Int64" -# compare "[1,2,3]::Array(Int64)" -# compare "[1,2,cos(1)]" -# compare "[a,b,c]" 0 -# compare "[a,b,c]::Array(UInt8)" 0 - - -# compare "number AS a1, number AS b2, number FROM numbers(10)" -# compare "*[n]" 0 - -# compare "3 + 7 * (5 + 32) / 2 - 5 * (2 - 1)" -# compare "(a, b, c) * ((a, b, c) + (a, b, c))" 0 - -# compare "1 + 2 * 3 < a / b mod 5 OR [a, b, c] + 1 != [c, d, e] AND n as res" 0 -# compare "1 + 2 * 3 < a / b mod 5 AND [a, b, c] + 1 != [c, d, e] OR n as res" 0 - -# compare "'needle' LIKE 'haystack' AND NOT needle NOT ILIKE haystack" 0 -# compare "'needle' LIKE 'haystack' AND (NOT needle) NOT ILIKE haystack" 0 - -# compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 -# compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 - -# compare "[1, 2, 3, cast(['a', 'b', c] as Array(String)), 4]" 0 -# compare "[1, 2, 3, cast(['a', 'b', c], Array(String)), 4]" 0 - -# compare "EXTRACT(DAY FROM toDate('2017-06-15'))" -# compare "substring(toFixedString('hello12345', 16) from 1 for 8)" -# compare "position('Hello, world!' IN '!')" - -# compare "trim(TRAILING 'x' FROM 'xxfooxx')" -# compare "ltrim('') || rtrim('') || trim('')" - -# compare "WITH 2 AS \`b.c\`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t -# SELECT INTERVAL CASE CASE WHEN NOT -a[b.c] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;" "WITH 2 AS \`b.c\`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t -# SELECT \$ INTERVAL CASE CASE WHEN NOT -a[b.c] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;" 2 \ No newline at end of file From a9ece9464555e0d0a66ed0c178bc62113d37d901 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 5 Sep 2022 16:38:24 +0000 Subject: [PATCH 060/173] Fix style --- src/Parsers/ExpressionListParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 546bf68a239..ed4274c05aa 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2281,7 +2281,7 @@ bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & layers.pop_back(); - /// We try to check whether there were some checkpoint + /// We try to check whether there were some checkpoint while (!layers.empty() && !layers.back()->checkpoint_pos) layers.pop_back(); From 8f064a172022db0d2d74372a2900e35ebaf19aae Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 5 Sep 2022 20:28:06 +0200 Subject: [PATCH 061/173] Fix --- src/Disks/IO/ThreadPoolReader.cpp | 23 +++++++-------------- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 25 +++++++---------------- 2 files changed, 14 insertions(+), 34 deletions(-) diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index 9b38607c204..ec23f0be7fa 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -200,29 +200,20 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - ThreadGroupStatusPtr running_group; + ThreadGroupStatusPtr thread_group; if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - running_group = CurrentThread::get().getThreadGroup(); + thread_group = CurrentThread::get().getThreadGroup(); - ContextPtr query_context; - if (CurrentThread::isInitialized()) - query_context = CurrentThread::get().getQueryContext(); - - auto task = std::make_shared>([request, fd, running_group, query_context] + auto task = std::make_shared>([request, fd, thread_group] { - ThreadStatus thread_status; + if (thread_group) + CurrentThread::attachTo(thread_group); SCOPE_EXIT({ - if (running_group) - thread_status.detachQuery(); + if (thread_group) + CurrentThread::detachQuery(); }); - if (running_group) - thread_status.attachQuery(running_group); - - if (query_context) - thread_status.attachQueryContext(query_context); - setThreadName("ThreadPoolRead"); Stopwatch watch(CLOCK_MONOTONIC); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 8e2551dceb0..b5c9c0dc82d 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -40,31 +40,20 @@ ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queu std::future ThreadPoolRemoteFSReader::submit(Request request) { - ThreadGroupStatusPtr running_group; + ThreadGroupStatusPtr thread_group; if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - running_group = CurrentThread::get().getThreadGroup(); + thread_group = CurrentThread::get().getThreadGroup(); - ContextPtr query_context; - if (CurrentThread::isInitialized()) - query_context = CurrentThread::get().getQueryContext(); - - auto task = std::make_shared>([request, running_group, query_context] + auto task = std::make_shared>([request, thread_group] { - ThreadStatus thread_status; + if (thread_group) + CurrentThread::attachTo(thread_group); SCOPE_EXIT({ - if (running_group) - thread_status.detachQuery(); + if (thread_group) + CurrentThread::detachQuery(); }); - /// To be able to pass ProfileEvents. - if (running_group) - thread_status.attachQuery(running_group); - - /// Save query context if any, because cache implementation needs it. - if (query_context) - thread_status.attachQueryContext(query_context); - setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; From 74c958931b4f24df1705fda9b03ad8f9e0b344ed Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 21:16:04 -0700 Subject: [PATCH 062/173] Kusto-phase1 : Updated kql pipe flow with some optimizations --- src/Parsers/Kusto/ParserKQLFilter.cpp | 20 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 47 +-- src/Parsers/Kusto/ParserKQLLimit.h | 6 - src/Parsers/Kusto/ParserKQLOperators.cpp | 156 ++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 8 +- src/Parsers/Kusto/ParserKQLProject.cpp | 28 +- src/Parsers/Kusto/ParserKQLProject.h | 6 - src/Parsers/Kusto/ParserKQLQuery.cpp | 353 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLQuery.h | 19 +- src/Parsers/Kusto/ParserKQLSort.cpp | 31 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 43 ++- src/Parsers/Kusto/ParserKQLStatement.h | 7 + src/Parsers/Kusto/ParserKQLSummarize.cpp | 192 +++--------- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- src/Parsers/Kusto/ParserKQLTable.cpp | 21 +- src/Parsers/Kusto/ParserKQLTable.h | 3 +- src/Parsers/tests/gtest_Parser.cpp | 30 +- 17 files changed, 567 insertions(+), 408 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d80..3a399bdccdb 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -10,27 +10,15 @@ namespace DB bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - Pos begin = pos; - String expr; - - KQLOperators convetor; - - for (auto op_po : op_pos) - { - if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; - else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; - } + String expr = getExprFromToken(pos); + ASTPtr where_expression; Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos pos_filter(token_filter, pos.max_depth); - if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index ece04f644cc..bb8e08fd378 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -11,52 +11,17 @@ namespace DB bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; + ASTPtr limit_length; - auto begin = pos; - Int64 min_limit = -1; - auto final_pos = pos; - for (auto op_po: op_pos) - { - auto is_number = [&] - { - for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) - { - if (!isdigit(*ch)) - return false; - } - return true; - }; + auto expr = getExprFromToken(pos); - if (!is_number()) - return false; + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - auto limit_length = std::strtol(op_po->begin,nullptr, 10); - if (-1 == min_limit) - { - min_limit = limit_length; - final_pos = op_po; - } - else - { - if (min_limit > limit_length) - { - min_limit = limit_length; - final_pos = op_po; - } - } - } - - String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); - - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); - - if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index c234985b0a6..1585805f0fc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { -public: - void setTableName(String table_name_) {table_name = table_name_;} - protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 60fa022f9bb..b250f5def60 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -10,9 +12,60 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) { - String new_expr, left_wildcards, right_wildcards; + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::Comma ) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +{ + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + --token_pos; + --token_pos; + return ch_op; + +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +{ + String new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -33,20 +86,45 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; + auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -87,14 +165,23 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { + auto last_op = tokens.back(); + auto last_pos = pos; + switch (op_value) { case KQLOperatorValue::contains: @@ -142,7 +229,6 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -152,9 +238,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: @@ -166,35 +254,67 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: - new_expr = "in"; + new_expr = genInOpExpr(pos,op,"in"); break; case KQLOperatorValue::not_in_cs: - new_expr = "not in"; + new_expr = genInOpExpr(pos,op,"not in"); break; case KQLOperatorValue::in: @@ -229,13 +349,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - return res; + pos = begin; + return false; } } + diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 64af156f505..9796ae10c07 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t @@ -83,7 +83,7 @@ protected: {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,7 +98,9 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); }; } diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c..e978323d821 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,38 +6,18 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto begin = pos; + ASTPtr select_expression_list; String expr; - if (op_pos.empty()) - expr = "*"; - else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } - expr = getExprFromToken(op_pos.back()); - } + expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index 3ab3c82f1be..b64675beed0 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLProject : public ParserKQLBase { -public: - void addColumn(String column) {columns.insert(column);} - protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - std::unordered_set columns; }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 7f6fcbcdb70..9fc32da7790 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,120 +8,339 @@ #include #include #include +#include +#include +#include +#include +#include +#include namespace DB { -bool ParserKQLBase :: parsePrepare(Pos & pos) +namespace ErrorCodes { - op_pos.push_back(pos); - return true; + extern const int UNKNOWN_FUNCTION; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + +String ParserKQLBase :: getExprFromPipe(Pos & pos) +{ + uint16_t bracket_count = 0; + auto begin = pos; + auto end = pos; + while (!end->isEnd() && end->type != TokenType::Semicolon) + { + if (end->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (end->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (end->type == TokenType::PipeMark && bracket_count == 0) + break; + + ++end; + } + --end; + return String(begin->begin, end->end); +} + +String ParserKQLBase :: getExprFromToken(Pos & pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::vector tokens; + String alias; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) + { + tokens.push_back(token); + } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + + for (auto token:tokens) + res = res.empty()? token : res +" " + token; return res; } +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +{ + if (op_name == "filter" || op_name == "where") + return std::make_unique(); + else if (op_name == "limit" || op_name == "take") + return std::make_unique(); + else if (op_name == "project") + return std::make_unique(); + else if (op_name == "sort by" || op_name == "order by") + return std::make_unique(); + else if (op_name == "summarize") + return std::make_unique(); + else if (op_name == "table") + return std::make_unique(); + else + return nullptr; +} + bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + auto select_query = std::make_shared(); node = select_query; - - ParserKQLFilter kql_filter_p; - ParserKQLLimit kql_limit_p; - ParserKQLProject kql_project_p; - ParserKQLSort kql_sort_p; - ParserKQLSummarize kql_summarize_p; - ParserKQLTable kql_table_p; - - ASTPtr select_expression_list; ASTPtr tables; - ASTPtr where_expression; - ASTPtr group_expression_list; - ASTPtr order_expression_list; - ASTPtr limit_length; - std::unordered_map kql_parser = { - { "filter",&kql_filter_p}, - { "where",&kql_filter_p}, - { "limit",&kql_limit_p}, - { "take",&kql_limit_p}, - { "project",&kql_project_p}, - { "sort",&kql_sort_p}, - { "order",&kql_sort_p}, - { "summarize",&kql_summarize_p}, - { "table",&kql_table_p} + std::unordered_map kql_parser = + { + { "filter", {"filter", false, false, 3}}, + { "where", {"filter", false, false, 3}}, + { "limit", {"limit", false, true, 3}}, + { "take", {"limit", false, true, 3}}, + { "project", {"project", false, false, 3}}, + { "sort by", {"order by", false, false, 4}}, + { "order by", {"order by", false, false, 4}}, + { "table", {"table", false, false, 3}}, + { "summarize", {"summarize", true, true, 3}} }; std::vector> operation_pos; - operation_pos.push_back(std::make_pair("table",pos)); - String table_name(pos->begin,pos->end); + String table_name(pos->begin, pos->end); - while (!pos->isEnd()) + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - ++pos; - if (pos->type == TokenType::PipeMark) + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + if (pos->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; - String kql_operator(pos->begin,pos->end); + String kql_operator(pos->begin, pos->end); + if (kql_operator == "order" || kql_operator == "sort") + { + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos,expected)) + { + kql_operator = "order by"; + --pos; + } + } if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator,pos)); + operation_pos.push_back(std::make_pair(kql_operator, pos)); } + else + ++pos; } - for (auto &op_pos : operation_pos) - { - auto kql_operator = op_pos.first; - auto npos = op_pos.second; - if (!npos.isValid()) - return false; + auto kql_operator_str = operation_pos.back().first; + auto npos = operation_pos.back().second; + if (!npos.isValid()) + return false; - if (!kql_parser[kql_operator]->parsePrepare(npos)) + auto kql_operator_p = getOperator(kql_operator_str); + + if (!kql_operator_p) + return false; + + if (operation_pos.size() == 1) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + npos = operation_pos.front().second; + if (!ParserKQLTable().parse(npos, node, expected)) + return false; + } + else + { + String project_clause, order_clause, where_clause, limit_clause; + auto last_pos = operation_pos.back().second; + auto last_op = operation_pos.back().first; + + auto set_main_query_clause =[&](String & op, Pos & op_pos) + { + auto op_str = ParserKQLBase::getExprFromPipe(op_pos); + if (op == "project") + project_clause = op_str; + else if (op == "where" || op == "filter") + where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); + else if (op == "limit" || op == "take") + limit_clause = op_str; + else if (op == "order by" || op == "sort by") + order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; + }; + + set_main_query_clause(last_op, last_pos); + + operation_pos.pop_back(); + + if (kql_parser[last_op].need_input) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else + { + while (operation_pos.size() > 0) + { + auto prev_op = operation_pos.back().first; + auto prev_pos = operation_pos.back().second; + + if (kql_parser[prev_op].gen_output) + break; + if (!project_clause.empty() && prev_op == "project") + break; + set_main_query_clause(prev_op, prev_pos); + operation_pos.pop_back(); + last_op = prev_op; + last_pos = prev_pos; + } + } + + if (operation_pos.size() > 0) + { + for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + --last_pos; + + String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + return false; + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + } + else + { + if (!ParserKQLTable().parse(last_pos, node, expected)) + return false; + } + + auto set_query_clasue =[&](String op_str, String op_calsue) + { + auto oprator = getOperator(op_str); + if (oprator) + { + Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + if (!oprator->parse(pos_clause, node, expected)) + return false; + } + return true; + }; + + if (!select_query->select()) + { + if (project_clause.empty()) + project_clause = "*"; + if (!set_query_clasue("project", project_clause)) + return false; + } + + if (!order_clause.empty()) + if (!set_query_clasue("order by", order_clause)) + return false; + + if (!where_clause.empty()) + if (!set_query_clasue("where", where_clause)) + return false; + + if (!limit_clause.empty()) + if (!set_query_clasue("limit", limit_clause)) + return false; + return true; + } + + if (!select_query->select()) + { + auto expr = String("*"); + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!std::make_unique()->parse(new_pos, node, expected)) return false; } - if (!kql_table_p.parse(pos, tables, expected)) + return true; +} + +bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_node; + + if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) return false; - if (!kql_project_p.parse(pos, select_expression_list, expected)) - return false; + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(select_node); - kql_limit_p.setTableName(table_name); - if (!kql_limit_p.parse(pos, limit_length, expected)) - return false; - else - { - if (limit_length) - tables = std::move(limit_length); - } + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; - if (!kql_filter_p.parse(pos, where_expression, expected)) - return false; + node_table_expr->children.emplace_back(node_subquery); - if (!kql_sort_p.parse(pos, order_expression_list, expected)) - return false; + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; - if (!kql_summarize_p.parse(pos, select_expression_list, expected)) - return false; - else - group_expression_list = kql_summarize_p.group_expression_list; + ASTPtr res = std::make_shared(); - select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + res->children.emplace_back(node_table_in_select_query_emlement); + node = res; return true; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9..42f5f84f031 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,25 +1,32 @@ #pragma once #include +#include namespace DB { class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; - -protected: - std::vector op_pos; - std::vector expressions; - virtual String getExprFromToken(Pos pos); + static String getExprFromToken(Pos & pos); + static String getExprFromPipe(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t & max_depth); }; class ParserKQLQuery : public IParserBase { + protected: + static std::unique_ptr getOperator(String &op_name); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLSubquery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 70e3283ee3e..f7540d729fd 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -10,41 +10,32 @@ namespace DB bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - - auto begin = pos; bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - ParserKeyword by("by"); + auto expr = getExprFromToken(pos); - pos = op_pos.back(); // sort only affected by last one + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - if (!by.ignore(pos, expected)) + auto pos_backup = new_pos; + if (!order_list.parse(pos_backup, order_expression_list, expected)) return false; - if (!order_list.parse(pos,order_expression_list,expected)) - return false; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - return false; - - pos = op_pos.back(); - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) { - String tmp(pos->begin,pos->end); + String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" or tmp == "asc") has_dir = true; - if (pos->type == TokenType::Comma) + if (new_pos->type == TokenType::Comma) { has_directions.push_back(has_dir); has_dir = false; } - - ++pos; + ++new_pos; } has_directions.push_back(has_dir); @@ -58,13 +49,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) order_expr->nulls_direction = -1; else order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; - } } - node = order_expression_list; + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - pos =begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad22131..573c953c313 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -35,7 +36,6 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - // will support union next phase ASTPtr kql_query; if (!ParserKQLQuery().parse(pos, kql_query, expected)) @@ -58,4 +58,45 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d00845..ef44b2d6c8a 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -41,5 +41,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f3c402a80be..4d8d7753178 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,8 +1,3 @@ -#include -#include -#include -#include -#include #include #include #include @@ -15,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -23,182 +17,64 @@ namespace DB { -std::pair ParserKQLSummarize::removeLastWord(String input) -{ - ReadBufferFromString in(input); - String token; - std::vector temp; - - while (!in.eof()) - { - readStringUntilWhitespace(token, in); - if (in.eof()) - break; - skipWhitespaceIfAny(in); - temp.push_back(token); - } - - String first_part; - for (std::size_t i = 0; i < temp.size() - 1; i++) - { - first_part += temp[i]; - } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } - - return std::make_pair("", ""); -} - -String ParserKQLSummarize::getBinGroupbyString(String expr_bin) -{ - String column_name; - bool bracket_start = false; - bool comma_start = false; - String bin_duration; - - for (char ch : expr_bin) - { - if (comma_start && ch != ')') - bin_duration += ch; - if (ch == ',') - { - comma_start = true; - bracket_start = false; - } - if (bracket_start) - column_name += ch; - if (ch == '(') - bracket_start = true; - } - - std::size_t len = bin_duration.size(); - char bin_type = bin_duration[len - 1]; // y, d, h, m, s - if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) - { - return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; - } - bin_duration = bin_duration.substr(0, len - 1); - - switch (bin_type) - { - case 'y': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" - + bin_duration + " * (12*30*86400))) AS bin_year"; - case 'd': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration - + " * 86400)) AS bin_day"; - case 'h': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration - + " * 3600)) AS bin_hour"; - case 'm': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration - + " * 60)) AS bin_minute"; - case 's': - return "toDateTime(" + column_name + ") AS bin_sec"; - default: - return ""; - } -} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - if (op_pos.size() != 1) // now only support one summarize - return false; + ASTPtr select_expression_list; + ASTPtr group_expression_list; - auto begin = pos; - - pos = op_pos.back(); String expr_aggregation; String expr_groupby; String expr_columns; - String expr_bin; bool groupby = false; - bool bin_function = false; - String bin_column; - String last_string; - String column_name; + + auto begin = pos; + auto pos_groupby = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "by") - groupby = true; - else { - if (groupby) - { - if (String(pos->begin, pos->end) == "bin" || bin_function) - { - bin_function = true; - expr_bin += String(pos->begin, pos->end); - if (String(pos->begin, pos->end) == ")") - { - expr_bin = getBinGroupbyString(expr_bin); - expr_groupby += expr_bin; - bin_function = false; - } - } - - else - expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; - } - - else - { - if (String(pos->begin, pos->end) == "=") - { - std::pair temp = removeLastWord(expr_aggregation); - expr_aggregation = temp.first; - column_name = temp.second; - } - else - { - if (!column_name.empty()) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - - if (String(pos->begin, pos->end) == ")") - { - expr_aggregation = expr_aggregation + " AS " + column_name; - column_name = ""; - } - } - else if (!bin_function) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; - } - } - } + groupby = true; + auto end = pos; + --end; + expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; + pos_groupby = pos; + ++pos_groupby; } ++pos; } - - if (expr_groupby.empty()) - expr_columns = expr_aggregation; + --pos; + if (groupby) + expr_groupby = String(pos_groupby->begin, pos->end); else - { - if (expr_aggregation.empty()) - expr_columns = expr_groupby; - else - expr_columns = expr_groupby + "," + expr_aggregation; - } - Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); - IParser::Pos pos_columns(token_columns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) + expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; + expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + if (groupby) { - Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); - IParser::Pos postoken_groupby(token_groupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); } - pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f..1aad02705df 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,16 +5,13 @@ namespace DB { + class ParserKQLSummarize : public ParserKQLBase { -public: - ASTPtr group_expression_list; protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - static std::pair removeLastWord(String input); - static String getBinGroupbyString(String expr_bin); }; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index fadf5305e89..6356ad688b6 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -7,15 +7,6 @@ namespace DB { -bool ParserKQLTable :: parsePrepare(Pos & pos) -{ - if (!op_pos.empty()) - return false; - - op_pos.push_back(pos); - return true; -} - bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords @@ -44,12 +35,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "EXPLAIN" }); - if (op_pos.empty()) - return false; - - auto begin = pos; - pos = op_pos.back(); - + ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); @@ -58,9 +44,10 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) return false; - if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) return false; - pos = begin; + + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d..c67dcb15156 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,11 +8,10 @@ namespace DB class ParserKQLTable : public ParserKQLBase { + protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool parsePrepare(Pos &pos) override; - }; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3575e8ba175..b452bd27642 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,27 +308,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" + "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | sort by FirstName desc", @@ -360,7 +360,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY Age DESC" + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" }, { "Customers | sort by FirstName nulls first", @@ -408,31 +408,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" }, { "Customers |summarize count() by Occupation", - "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", From 3f65e6b2b1cae1b0bc0e19df43d9d7da79ee5bc4 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 22:27:23 -0700 Subject: [PATCH 063/173] Kusto-phase1 : fixed style, removed trailing whitespaces --- src/Parsers/Kusto/ParserKQLQuery.cpp | 15 +++++---------- src/Parsers/Kusto/ParserKQLStatement.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 9fc32da7790..03cb5a8ad43 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -18,11 +18,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FUNCTION; -} - String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); @@ -95,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto token:tokens) res = res.empty()? token : res +" " + token; return res; } @@ -120,7 +115,7 @@ std::unique_ptr ParserKQLQuery::getOperator(String & op_name) bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - struct KQLOperatorDataFlowState + struct KQLOperatorDataFlowState { String operator_name; bool need_input; @@ -206,7 +201,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserKQLTable().parse(npos, node, expected)) return false; } - else + else { String project_clause, order_clause, where_clause, limit_clause; auto last_pos = operation_pos.back().second; @@ -252,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (operation_pos.size() > 0) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; @@ -274,7 +269,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto set_query_clasue =[&](String op_str, String op_calsue) { auto oprator = getOperator(op_str); - if (oprator) + if (oprator) { Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); IParser::Pos pos_clause(token_clause, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 573c953c313..21e480234d3 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -69,7 +69,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String kql_statement; if (s_lparen.ignore(pos, expected)) - { + { ++paren_count; while (!pos->isEnd()) { diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 4d8d7753178..75eacb1adbd 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -49,7 +49,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte expr_groupby = String(pos_groupby->begin, pos->end); else expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; - + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; From 4a68bfef393354468cb9b64b43dd9dddcd0d51eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 6 Sep 2022 10:58:14 +0000 Subject: [PATCH 064/173] Fix tests with async Keeper start --- tests/integration/helpers/keeper_utils.py | 41 ++++++++ .../test_keeper_and_access_storage/test.py | 3 + tests/integration/test_keeper_auth/test.py | 3 + .../test_keeper_back_to_back/test.py | 2 + .../configs/enable_keeper.xml | 22 ----- .../configs/keeper_conf.xml | 8 -- .../test_keeper_force_recovery/test.py | 62 ++++-------- .../test.py | 54 +++-------- .../test_keeper_four_word_command/test.py | 96 ++++++------------- .../test_keeper_incorrect_config/test.py | 4 +- .../test_keeper_internal_secure/test.py | 3 + .../test_keeper_mntr_pressure/test.py | 41 +++----- .../test.py | 29 +----- .../test_keeper_multinode_simple/test.py | 32 +------ .../integration/test_keeper_nodes_add/test.py | 5 + .../test_keeper_nodes_move/test.py | 4 + .../test_keeper_nodes_remove/test.py | 14 ++- .../test_keeper_persistent_log/test.py | 17 +++- .../test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../configs/enable_keeper_two_nodes_1.xml | 28 ------ .../configs/enable_keeper_two_nodes_2.xml | 28 ------ .../configs/enable_keeper_two_nodes_3.xml | 28 ------ .../test_keeper_restore_from_snapshot/test.py | 3 + .../test_keeper_secure_client/test.py | 3 +- tests/integration/test_keeper_session/test.py | 20 +--- .../test_keeper_snapshot_on_exit/test.py | 2 + .../test.py | 4 +- .../integration/test_keeper_snapshots/test.py | 11 ++- .../test_keeper_snapshots_multinode/test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../test_keeper_three_nodes_start/test.py | 2 + .../test_keeper_three_nodes_two_alive/test.py | 12 ++- .../test_keeper_two_nodes_cluster/test.py | 29 +----- .../test_keeper_znode_time/test.py | 25 +---- .../test_keeper_zookeeper_converter/test.py | 3 + 40 files changed, 218 insertions(+), 640 deletions(-) create mode 100644 tests/integration/helpers/keeper_utils.py delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py new file mode 100644 index 00000000000..681407e5e8c --- /dev/null +++ b/tests/integration/helpers/keeper_utils.py @@ -0,0 +1,41 @@ +import socket +import time + + +def get_keeper_socket(cluster, node, port=9181): + hosts = cluster.get_instance_ip(node.name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, port)) + return client + + +def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): + client = None + try: + client = get_keeper_socket(cluster, node, port) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def wait_until_connected(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_until_quorum_lost(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(cluster, nodes): + for node in nodes: + wait_until_connected(cluster, node) diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index ae6b0085094..72e3582979b 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -15,6 +16,8 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node1) + yield cluster finally: cluster.shutdown() diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 364d93dfc53..e1331c35eeb 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -25,6 +26,7 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -455,6 +457,7 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 73fface02b4..5ae71841004 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -61,6 +62,7 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml deleted file mode 100644 index c1d38a1de52..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - - diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml deleted file mode 100644 index ebb0d98ddf4..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - node1 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f3bb0ca56e3..f7c3787b4d8 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -2,6 +2,7 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time @@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): while zk.retry(zk.exists, path) is None: time.sleep(0.1) @@ -104,9 +74,6 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: @@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster): for node in nodes[CLUSTER_SIZE:]: node.stop_clickhouse() - wait_nodes(nodes[:CLUSTER_SIZE]) + keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE]) node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]] @@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster): node.stop_clickhouse() # wait for node1 to lose quorum - while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, nodes[0]) nodes[0].copy_file_to_container( os.path.join(CONFIG_DIR, "recovered_keeper1.xml"), @@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster): nodes[0].query("SYSTEM RELOAD CONFIG") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG - send_4lw_cmd(nodes[0].name, "rcvr") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) + keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr") + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) # add one node to restore the quorum nodes[CLUSTER_SIZE].copy_file_to_container( @@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster): ) nodes[CLUSTER_SIZE].start_clickhouse() - wait_until_connected(nodes[CLUSTER_SIZE].name) + keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE]) # node1 should have quorum now and accept requests - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name)) @@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster): f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml", ) node.start_clickhouse() - wait_until_connected(node.name) + keeper_utils.wait_until_connected(cluster, node) node_zks.append(get_fake_zk(node.name)) # refresh old zk sessions @@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) for zk in node_zks[:nodes_left]: assert_all_data(zk) diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 0a554e33119..1c0d5e9a306 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -2,10 +2,11 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooRetry CLUSTER_SIZE = 3 @@ -45,47 +46,19 @@ def started_cluster(): def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + hosts=cluster.get_instance_ip(nodename) + ":9181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=10), + command_retry=KazooRetry(max_tries=10), ) _fake_zk_instance.start() return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): - while zk.exists(path) is None: + while zk.retry(zk.exists, path) is None: time.sleep(0.1) - assert zk.get(path)[0] == data.encode() + assert zk.retry(zk.get, path)[0] == data.encode() def close_zk(zk): @@ -93,20 +66,17 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: - wait_nodes(nodes) + keeper_utils.wait_nodes(cluster, nodes) node_zks = [get_fake_zk(node.name) for node in nodes] data_in_cluster = [] def add_data(zk, path, data): - zk.create(path, data.encode()) + zk.retry(zk.create, path, data.encode()) data_in_cluster.append((path, data)) def assert_all_data(zk): @@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster): ) nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) assert_all_data(get_fake_zk(nodes[0].name)) finally: diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..30abc7422c4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -1,6 +1,7 @@ import socket import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -25,6 +26,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -56,28 +61,6 @@ def clear_znodes(): destroy_zk_client(zk) -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - # zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def reset_node_stats(node_name=node1.name): +def reset_node_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"srst") client.recv(10) finally: @@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name): client.close() -def send_4lw_cmd(node_name=node1.name, cmd="ruok"): +def reset_conn_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def reset_conn_stats(node_name=node1.name): - client = None - try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"crst") client.recv(10_000) finally: @@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster): client = None try: wait_nodes() - data = send_4lw_cmd(cmd="ruok") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok") assert data == "imok" finally: close_keeper_socket(client) @@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster): clear_znodes() # reset stat first - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action( @@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster): delete_cnt=2, ) - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") # print(data.decode()) reader = csv.reader(data.split("\n"), delimiter="\t") @@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="srst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst") assert data.strip() == "Server stats reset." - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") assert len(data) != 0 # print(data) @@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="conf") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf") reader = csv.reader(data.split("\n"), delimiter="=") result = {} @@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster): def test_cmd_isro(started_cluster): wait_nodes() - assert send_4lw_cmd(node1.name, "isro") == "rw" - assert send_4lw_cmd(node2.name, "isro") == "ro" + assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw" + assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro" def test_cmd_srvr(started_cluster): @@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster): wait_nodes() clear_znodes() - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="srvr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr") print("srvr output -------------------------------------") print(data) @@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster): try: wait_nodes() clear_znodes() - reset_node_stats(node1.name) - reset_conn_stats(node1.name) + reset_node_stats(node1) + reset_conn_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="stat") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat") print("stat output -------------------------------------") print(data) @@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output -------------------------------------") print(data) @@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="crst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst") print("crst output -------------------------------------") print(data) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output(after crst) -------------------------------------") print(data) @@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, ephemeral_cnt=2) - data = send_4lw_cmd(cmd="dump") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump") print("dump output -------------------------------------") print(data) @@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchs") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs") print("wchs output -------------------------------------") print(data) @@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchc") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc") print("wchc output -------------------------------------") print(data) @@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchp") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp") print("wchp output -------------------------------------") print(data) diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index e0a28b00b4f..9912959611a 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -173,7 +174,7 @@ NORMAL_CONFIG = """ """ -def test_duplicate_endpoint(started_cluster): +def test_invalid_configs(started_cluster): node1.stop_clickhouse() def assert_config_fails(config): @@ -192,5 +193,6 @@ def test_duplicate_endpoint(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2d45e95e4ff..2448a426fe2 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -47,6 +48,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 471767210d6..1468aa01896 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import pytest import random import string @@ -30,6 +31,7 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -37,40 +39,22 @@ def started_cluster(): cluster.shutdown() -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - def test_aggressive_mntr(started_cluster): - def go_mntr(node_name): - for _ in range(100000): - print(node_name, send_4lw_cmd(node_name, "mntr")) + def go_mntr(node): + for _ in range(10000): + try: + print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr")) + except ConnectionRefusedError: + pass - node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) - node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) - node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread = threading.Thread(target=lambda: go_mntr(node1)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3)) node1_thread.start() node2_thread.start() node3_thread.start() @@ -78,8 +62,7 @@ def test_aggressive_mntr(started_cluster): node2.stop_clickhouse() node3.stop_clickhouse() - while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, node1) node1.stop_clickhouse() starters = [] diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index d6d01a5d0a6..06a5cd8dc5a 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -44,6 +45,7 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -55,31 +57,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -92,7 +69,6 @@ def get_fake_zk(nodename, timeout=30.0): @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -296,7 +272,6 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 694600acc67..b8bdb098c0d 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -43,31 +45,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -78,7 +55,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -120,7 +96,6 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -177,7 +152,6 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -219,7 +193,6 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -244,7 +217,6 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): - wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index c3449534e87..aad674332ac 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -41,9 +42,11 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def test_nodes_add(started_cluster): + keeper_utils.wait_until_connected(cluster, node1) zk_conn = get_fake_zk(node1) for i in range(100): @@ -62,6 +65,7 @@ def test_nodes_add(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) zk_conn2 = get_fake_zk(node2) @@ -93,6 +97,7 @@ def test_nodes_add(started_cluster): node2.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) zk_conn3 = get_fake_zk(node3) for i in range(100): diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 31082846fb8..c816d69e2d1 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -11,6 +11,7 @@ import os import time from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) @@ -33,6 +34,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: @@ -41,6 +44,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def get_fake_zk(node, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 13303d320eb..03536f07064 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,6 +2,8 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +import time import os from kazoo.client import KazooClient, KazooState @@ -23,6 +25,7 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -79,9 +82,12 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn3 = get_fake_zk(node3) zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node3.stop_clickhouse() @@ -91,6 +97,7 @@ def test_nodes_remove(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") + zk_conn = get_fake_zk(node1) zk_conn.sync("/test_two_0") @@ -98,8 +105,11 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node2.stop_clickhouse() diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 377fa436a87..d7cc79836a7 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) + yield cluster finally: @@ -46,6 +49,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -62,7 +70,7 @@ def test_state_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -111,7 +119,7 @@ def test_state_duplicate_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_duplicated_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -119,7 +127,7 @@ def test_state_duplicate_restart(started_cluster): node_zk2.create("/test_state_duplicated_restart/just_test2") node_zk2.create("/test_state_duplicated_restart/just_test3") - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk3 = get_connection_zk("node") @@ -159,6 +167,7 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): + try: node_zk = None node_zk2 = None @@ -176,7 +185,7 @@ def test_ephemeral_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_ephemeral_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index f15e772fd5f..1552abd32e9 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -26,10 +27,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml deleted file mode 100644 index d51e420f733..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml deleted file mode 100644 index 3f1ee1e01a8..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml deleted file mode 100644 index a99bd5d5296..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7270c84bdda..7f2c2e89703 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -24,6 +25,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -84,6 +86,7 @@ def test_recover_from_snapshot(started_cluster): # stale node should recover from leader's snapshot # with some sanitizers can start longer than 5 seconds node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) print("Restarted") try: diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 55e00880da0..81584129052 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import string import os import time @@ -40,4 +41,4 @@ def started_cluster(): def test_connection(started_cluster): # just nothrow - node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") + node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 30db4d9548c..645045e7865 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time import socket import struct @@ -44,25 +45,8 @@ def destroy_zk_client(zk): pass -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for n in [node1]: - wait_node(n) + keeper_utils.wait_nodes(cluster, [node1]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 1ca5888ab4d..933e83414a4 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -27,6 +28,7 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 4351c5ac96f..6a64cf0ac92 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -2,6 +2,7 @@ ##!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool from kazoo.client import KazooClient, KazooState import random @@ -22,7 +23,7 @@ node3 = cluster.add_instance( def start_zookeeper(node): - node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) def stop_zookeeper(node): @@ -66,6 +67,7 @@ def stop_clickhouse(node): def start_clickhouse(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots, node): diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 08f60e538a4..a27ca6f92a5 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -35,6 +36,7 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -50,6 +52,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -69,7 +76,7 @@ def test_state_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -123,7 +130,7 @@ def test_ephemeral_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 1461f35e6a4..52d4ae71e33 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -20,10 +21,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -94,6 +100,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index e451f969b37..c8476568786 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -31,6 +32,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index f1de469c5a1..591dde6a70a 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -39,6 +40,7 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -48,6 +50,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def delete_with_retry(node_name, path): @@ -74,10 +77,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -110,10 +113,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -138,6 +141,7 @@ def test_restart_third_node(started_cluster): node1_zk.create("/test_restart", b"aaaa") node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) assert node3.contains_in_log( "Connected to ZooKeeper (or Keeper) before internal Keeper start" diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 8c0276f7d77..b87dcf6e758 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -29,6 +30,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -40,31 +42,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -75,7 +52,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -107,7 +83,6 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index bff3d52014e..f2076acc4d2 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -42,29 +43,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): @@ -129,6 +109,7 @@ def test_server_restart(started_cluster): node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue") node3.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node3) node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 50a9ee6a4a7..e459078f8ef 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -11,6 +12,7 @@ from kazoo.exceptions import ( ) import os import time +import socket cluster = ClickHouseCluster(__file__) @@ -60,6 +62,7 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots): From 7f4935b782b4519a6d1fd79fab2ae2aa6f6173ea Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 05:52:31 -0700 Subject: [PATCH 065/173] Kusto-phase1: removed extra spaces --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index b250f5def60..f8e4f9eaab0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -29,7 +29,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Po while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma ) + if (token_pos->type == TokenType::Comma) new_expr = new_expr + logic_op; else new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 03cb5a8ad43..5e07e3c4d9a 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -61,7 +61,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) if (token == "=") { ++pos; - if (String(pos->begin,pos->end) != "~" ) + if (String(pos->begin,pos->end) != "~") { alias = tokens.back(); tokens.pop_back(); From 896174e0ba5a18b79daf59c39b85493a1e905bff Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 12:45:22 -0700 Subject: [PATCH 066/173] Kusto-phase1: fixed small build issue --- src/Parsers/Kusto/ParserKQLQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 5e07e3c4d9a..8591b0f04df 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -90,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto const &token : tokens) res = res.empty()? token : res +" " + token; return res; } @@ -231,7 +231,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - while (operation_pos.size() > 0) + while (!operation_pos.empty()) { auto prev_op = operation_pos.back().first; auto prev_pos = operation_pos.back().second; From f2588764f8c5253ad4a9b805a1a6bb739ac71b5c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 7 Sep 2022 01:45:13 +0000 Subject: [PATCH 067/173] Fix checkpoints & array parser --- src/Parsers/ExpressionListParsers.cpp | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ed4274c05aa..feea031557e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -929,8 +929,8 @@ public: bool allow_alias = true; bool allow_alias_without_as_keyword = true; - std::optional checkpoint_pos; - Checkpoint checkpoint_type; + std::optional> saved_checkpoint; + Checkpoint current_checkpoint; protected: std::vector operators; @@ -1881,7 +1881,7 @@ private: class IntervalLayer : public Layer { public: - bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// INTERVAL 1 HOUR or INTERVAL expr HOUR /// @@ -1938,7 +1938,7 @@ public: if (state == 1) { - if (parseIntervalKind(pos, expected, interval_kind)) + if (action == Action::OPERATOR && parseIntervalKind(pos, expected, interval_kind)) { if (!mergeElement()) return false; @@ -2281,8 +2281,8 @@ bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & layers.pop_back(); - /// We try to check whether there were some checkpoint - while (!layers.empty() && !layers.back()->checkpoint_pos) + /// We try to check whether there was a checkpoint + while (!layers.empty() && !layers.back()->saved_checkpoint) layers.pop_back(); if (layers.empty()) @@ -2290,7 +2290,12 @@ bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & /// Currently all checkpoints are located in operand section next = Action::OPERAND; - pos = layers.back()->checkpoint_pos.value(); + + auto saved_checkpoint = layers.back()->saved_checkpoint.value(); + layers.back()->saved_checkpoint.reset(); + + pos = saved_checkpoint.first; + layers.back()->current_checkpoint = saved_checkpoint.second; } } @@ -2351,30 +2356,26 @@ typename ParserExpressionImpl::ParseResult ParserExpressionIm } auto old_pos = pos; - if (layers.back()->checkpoint_type != Checkpoint::Interval && parseOperator(pos, "INTERVAL", expected)) + auto current_checkpoint = layers.back()->current_checkpoint; + layers.back()->current_checkpoint = Checkpoint::None; + + if (current_checkpoint != Checkpoint::Interval && parseOperator(pos, "INTERVAL", expected)) { - layers.back()->checkpoint_pos = old_pos; - layers.back()->checkpoint_type = Checkpoint::Interval; + layers.back()->saved_checkpoint = {old_pos, Checkpoint::Interval}; layers.push_back(std::make_unique()); return ParseResult::OPERAND; } - else if (layers.back()->checkpoint_type != Checkpoint::Case && parseOperator(pos, "CASE", expected)) + else if (current_checkpoint != Checkpoint::Case && parseOperator(pos, "CASE", expected)) { - layers.back()->checkpoint_pos = old_pos; - layers.back()->checkpoint_type = Checkpoint::Case; + layers.back()->saved_checkpoint = {old_pos, Checkpoint::Case}; layers.push_back(std::make_unique()); return ParseResult::OPERAND; } - else if (layers.back()->checkpoint_pos) - { - layers.back()->checkpoint_pos.reset(); - layers.back()->checkpoint_type = Checkpoint::None; - } if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || tuple_literal_parser.parse(pos, tmp, expected) || - // array_literal_parser.parse(pos, tmp, expected) || + (layers.size() == 1 && array_literal_parser.parse(pos, tmp, expected)) || number_parser.parse(pos, tmp, expected) || literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || From a0735a5816a751a0cc71886d65e37ff069250df3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 22:28:25 -0700 Subject: [PATCH 068/173] Kusto-phase1: use empty to check vector instead of size --- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 8591b0f04df..04ee36705a9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -247,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (!operation_pos.empty()) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; From e51313b6b3ccec09da69c217f46c40c6dfa801ce Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 7 Sep 2022 17:44:29 +0200 Subject: [PATCH 069/173] Get rid of static threadpools --- ...chronousReadIndirectBufferFromRemoteFS.cpp | 4 +- ...ynchronousReadIndirectBufferFromRemoteFS.h | 4 +- src/Disks/IO/ThreadPoolReader.cpp | 26 +------- src/Disks/IO/ThreadPoolReader.h | 6 ++ src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 27 +------- src/Disks/IO/ThreadPoolRemoteFSReader.h | 4 ++ .../AzureBlobStorage/AzureObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/IObjectStorage.cpp | 20 +++--- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 4 +- .../ObjectStorages/Web/WebObjectStorage.cpp | 2 +- src/IO/AsynchronousReader.h | 5 +- src/IO/ParallelReadBuffer.cpp | 4 +- src/IO/ParallelReadBuffer.h | 4 +- src/IO/SynchronousReader.h | 3 +- src/IO/WriteBufferFromS3.cpp | 6 +- src/IO/WriteBufferFromS3.h | 6 +- src/Interpreters/Context.cpp | 64 +++++++++++++++++-- src/Interpreters/Context.h | 3 + src/Interpreters/threadPoolCallbackRunner.cpp | 52 ++++++++------- src/Interpreters/threadPoolCallbackRunner.h | 11 ++-- .../HDFS/AsynchronousReadBufferFromHDFS.cpp | 4 +- .../HDFS/AsynchronousReadBufferFromHDFS.h | 4 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageURL.cpp | 2 +- .../examples/async_read_buffer_from_hdfs.cpp | 2 +- 26 files changed, 157 insertions(+), 118 deletions(-) diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index cbfcbdf7b88..2717826f6ac 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRemoteFS( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_, size_t min_bytes_for_seek_) @@ -111,7 +111,7 @@ std::future AsynchronousReadIndirectBufferFromRemot request.ignore = bytes_to_ignore; bytes_to_ignore = 0; } - return reader->submit(request); + return reader.submit(request); } diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index 879658e239f..cf7feb416b2 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -31,7 +31,7 @@ class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase { public: explicit AsynchronousReadIndirectBufferFromRemoteFS( - AsynchronousReaderPtr reader_, const ReadSettings & settings_, + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_, size_t min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE); @@ -64,7 +64,7 @@ private: std::future asyncReadInto(char * data, size_t size); - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index ec23f0be7fa..c81dfb50275 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -86,6 +86,7 @@ static bool hasBugInPreadV2() ThreadPoolReader::ThreadPoolReader(size_t pool_size, size_t queue_size_) : pool(pool_size, pool_size, queue_size_) + , schedule(threadPoolCallbackRunner(pool, "ThreadPoolRead")) { } @@ -200,22 +201,8 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - ThreadGroupStatusPtr thread_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - thread_group = CurrentThread::get().getThreadGroup(); - - auto task = std::make_shared>([request, fd, thread_group] + return schedule([request, fd]() -> Result { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT({ - if (thread_group) - CurrentThread::detachQuery(); - }); - - setThreadName("ThreadPoolRead"); - Stopwatch watch(CLOCK_MONOTONIC); size_t bytes_read = 0; @@ -249,14 +236,7 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); return Result{ .size = bytes_read, .offset = request.ignore }; - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool.scheduleOrThrow([task]{ (*task)(); }, -request.priority); - - return future; + }, request.priority); } } diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h index 15486a7ac1f..376a8889da3 100644 --- a/src/Disks/IO/ThreadPoolReader.h +++ b/src/Disks/IO/ThreadPoolReader.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -29,10 +30,15 @@ class ThreadPoolReader final : public IAsynchronousReader private: ThreadPool pool; + ThreadPoolCallbackRunner schedule; + public: ThreadPoolReader(size_t pool_size, size_t queue_size_); + std::future submit(Request request) override; + void wait() override { pool.wait(); } + /// pool automatically waits for all tasks in destructor. }; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index b5c9c0dc82d..b4e6b4ee007 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -34,28 +33,15 @@ IAsynchronousReader::Result RemoteFSFileDescriptor::readInto(char * data, size_t ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_) : pool(pool_size, pool_size, queue_size_) + , schedule(threadPoolCallbackRunner(pool, "VFSRead")) { } std::future ThreadPoolRemoteFSReader::submit(Request request) { - ThreadGroupStatusPtr thread_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - thread_group = CurrentThread::get().getThreadGroup(); - - auto task = std::make_shared>([request, thread_group] + return schedule([request]() -> Result { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT({ - if (thread_group) - CurrentThread::detachQuery(); - }); - - setThreadName("VFSRead"); - CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); @@ -69,14 +55,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, result.offset ? result.size - result.offset : result.size); return Result{ .size = result.size, .offset = result.offset }; - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool.scheduleOrThrow([task]{ (*task)(); }, -request.priority); - - return future; + }, request.priority); } } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index 66e300697b8..c92eb7f7032 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -14,8 +15,11 @@ public: std::future submit(Request request) override; + void wait() override { pool.wait(); } + private: ThreadPool pool; + ThreadPoolCallbackRunner schedule; }; class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 40f68b86e9d..960d57f88df 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -107,7 +107,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL if (disk_read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = getThreadPoolReader(); + auto & reader = getThreadPoolReader(); return std::make_unique(reader, disk_read_settings, std::move(reader_impl)); } else diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 1a128770015..a0901d7ffd1 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -13,20 +13,22 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -AsynchronousReaderPtr IObjectStorage::getThreadPoolReader() +IAsynchronousReader & IObjectStorage::getThreadPoolReader() { - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; - static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); - return reader; + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initalized"); + + return context->getThreadPoolReader(); } ThreadPool & IObjectStorage::getThreadPoolWriter() { - constexpr size_t pool_size = 100; - constexpr size_t queue_size = 1000000; - static ThreadPool writer(pool_size, pool_size, queue_size); - return writer; + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initalized"); + + return context->getThreadPoolWriter(); } void IObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index dc6683cfe95..52e1a2cb270 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -130,7 +130,7 @@ public: /// Path to directory with objects cache virtual const std::string & getCacheBasePath() const; - static AsynchronousReaderPtr getThreadPoolReader(); + static IAsynchronousReader & getThreadPoolReader(); static ThreadPool & getThreadPoolWriter(); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 48542b7c1f8..dcf8a898ad4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -152,7 +152,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = getThreadPoolReader(); + auto & reader = getThreadPoolReader(); return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); } else @@ -199,7 +199,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN settings_ptr->s3_settings, attributes, buf_size, - threadPoolCallbackRunner(getThreadPoolWriter()), + threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"), disk_write_settings); return std::make_unique( diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 3c7ce47340d..9e3fb19eb3a 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -183,7 +183,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = IObjectStorage::getThreadPoolReader(); + auto & reader = IObjectStorage::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); } else diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 4583f594c37..ea103a87460 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -18,7 +19,7 @@ namespace DB * For example, this interface may not suffice if you want to serve 10 000 000 of 4 KiB requests per second. * This interface is fairly limited. */ -class IAsynchronousReader +class IAsynchronousReader : private boost::noncopyable { public: /// For local filesystems, the file descriptor is simply integer @@ -68,6 +69,8 @@ public: /// The method can be called concurrently from multiple threads. virtual std::future submit(Request request) = 0; + virtual void wait() = 0; + /// Destructor must wait for all not completed request and ignore the results. /// It may also cancel the requests. virtual ~IAsynchronousReader() = default; diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index e7bb3dc72a8..d055a42fcb6 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -43,7 +43,7 @@ struct ParallelReadBuffer::ReadWorker }; ParallelReadBuffer::ParallelReadBuffer( - std::unique_ptr reader_factory_, CallbackRunner schedule_, size_t max_working_readers_) + std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers_) : SeekableReadBuffer(nullptr, 0) , max_working_readers(max_working_readers_) , schedule(std::move(schedule_)) @@ -71,7 +71,7 @@ bool ParallelReadBuffer::addReaderToPool() auto worker = read_workers.emplace_back(std::make_shared(std::move(reader))); ++active_working_reader; - schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }); + schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }, 0); return true; } diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index 45b98f8c977..2f8cbf60c14 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -38,7 +38,7 @@ public: virtual off_t seek(off_t off, int whence) = 0; }; - explicit ParallelReadBuffer(std::unique_ptr reader_factory_, CallbackRunner schedule_, size_t max_working_readers); + explicit ParallelReadBuffer(std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers); ~ParallelReadBuffer() override { finishAndWait(); } @@ -75,7 +75,7 @@ private: size_t max_working_readers; std::atomic_size_t active_working_reader{0}; - CallbackRunner schedule; + ThreadPoolCallbackRunner schedule; std::unique_ptr reader_factory; diff --git a/src/IO/SynchronousReader.h b/src/IO/SynchronousReader.h index 7b5113a4487..238d6e9371e 100644 --- a/src/IO/SynchronousReader.h +++ b/src/IO/SynchronousReader.h @@ -13,7 +13,8 @@ class SynchronousReader final : public IAsynchronousReader { public: std::future submit(Request request) override; + + void wait() override {} }; } - diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 7646e2514a5..2a5b2faee66 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -59,7 +59,7 @@ WriteBufferFromS3::WriteBufferFromS3( const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_, size_t buffer_size_, - ScheduleFunc schedule_, + ThreadPoolCallbackRunner schedule_, const WriteSettings & write_settings_) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) @@ -266,7 +266,7 @@ void WriteBufferFromS3::writePart() } task_finish_notify(); - }); + }, 0); } catch (...) { @@ -394,7 +394,7 @@ void WriteBufferFromS3::makeSinglepartUpload() } task_notify_finish(); - }); + }, 0); } catch (...) { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index ae03299ffbd..b655fe1d14b 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -33,7 +34,6 @@ namespace Aws::S3::Model namespace DB { -using ScheduleFunc = std::function)>; class WriteBufferFromFile; /** @@ -53,7 +53,7 @@ public: const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - ScheduleFunc schedule_ = {}, + ThreadPoolCallbackRunner schedule_ = {}, const WriteSettings & write_settings_ = {}); ~WriteBufferFromS3() override; @@ -106,7 +106,7 @@ private: /// Following fields are for background uploads in thread pool (if specified). /// We use std::function to avoid dependency of Interpreters - const ScheduleFunc schedule; + const ThreadPoolCallbackRunner schedule; std::unique_ptr put_object_task; /// Does not need protection by mutex because of the logic around is_finished field. std::list TSA_GUARDED_BY(bg_tasks_mutex) upload_object_tasks; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 91604c8cc82..edfa141b0ed 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -230,6 +231,9 @@ struct ContextSharedPart mutable std::unique_ptr distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) mutable std::unique_ptr message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) + mutable std::unique_ptr threadpool_reader; + mutable std::unique_ptr threadpool_writer; + mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads @@ -313,15 +317,32 @@ struct ContextSharedPart ~ContextSharedPart() { - /// Wait for thread pool for background writes, + /// Wait for thread pool for background reads and writes, /// since it may use per-user MemoryTracker which will be destroyed here. - try + if (threadpool_reader) { - IObjectStorage::getThreadPoolWriter().wait(); + try + { + threadpool_reader->wait(); + threadpool_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } - catch (...) + + if (threadpool_writer) { - tryLogCurrentException(__PRETTY_FUNCTION__); + try + { + threadpool_writer->wait(); + threadpool_writer.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } try @@ -3407,6 +3428,39 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } +IAsynchronousReader & Context::getThreadPoolReader() const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + + if (!shared->threadpool_reader) + { + auto pool_size = config.getUInt(".threadpool_reader_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_reader_queue_size", 1000000); + + shared->threadpool_reader = std::make_unique(pool_size, queue_size); + } + + return *shared->threadpool_reader; +} + +ThreadPool & Context::getThreadPoolWriter() const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + + if (!shared->threadpool_writer) + { + auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); + + shared->threadpool_writer = std::make_unique(pool_size, pool_size, queue_size); + } + + return *shared->threadpool_writer; +} ReadSettings Context::getReadSettings() const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 2997fc370bf..35e5962903b 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1014,6 +1014,9 @@ public: OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; + IAsynchronousReader & getThreadPoolReader() const; + ThreadPool & getThreadPoolWriter() const; + /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp index adab79d06cb..9bf32e4f2e1 100644 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -1,40 +1,44 @@ #include "threadPoolCallbackRunner.h" #include - #include +#include +#include +#include + namespace DB { -CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) { - return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) mutable + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future { - pool->scheduleOrThrow( - [&, callback = std::move(callback), thread_group]() - { + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - - /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. - /// Typically, it may be changes from Process to User. - /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. - /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. - /// When, finally, we destroy the thread (and the ThreadStatus), - /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory, - /// and by this time user-level memory tracker may be already destroyed. - /// - /// As a work-around, reset memory tracker to total, which is always alive. - CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); - }); - callback(); + CurrentThread::detachQueryIfNotDetached(); }); + + setThreadName(thread_name.data()); + + return callback(); + }); + + auto future = task->get_future(); + + /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". + pool->scheduleOrThrow([task]{ (*task)(); }, -priority); + + return future; }; } +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); + } diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 59d06f2f1bc..c146ac67482 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -1,15 +1,18 @@ #pragma once #include +#include namespace DB { -/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously -using CallbackRunner = std::function)>; +/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. +template +using ThreadPoolCallbackRunner = std::function(std::function &&, size_t priority)>; -/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' -CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. +template +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index ef6eb5e7a80..b2194020dca 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -35,7 +35,7 @@ namespace ErrorCodes } AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( - AsynchronousReaderPtr reader_, const ReadSettings & settings_, std::shared_ptr impl_) + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_) : BufferWithOwnMemory(settings_.remote_fs_buffer_size) , reader(reader_) , priority(settings_.priority) @@ -72,7 +72,7 @@ std::future AsynchronousReadBufferFromHDFS::asyncRe request.offset = file_offset_of_buffer_end; request.priority = priority; request.ignore = 0; - return reader->submit(request); + return reader.submit(request); } void AsynchronousReadBufferFromHDFS::prefetch() diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index a65e74a8c73..0cb4b9b7a74 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -24,7 +24,7 @@ class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory impl_); @@ -51,7 +51,7 @@ private: std::future asyncReadInto(char * data, size_t size); - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; std::shared_ptr impl; std::future prefetch_future; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 1685de55b6e..5b48f0e78ed 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -499,7 +499,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k LOG_TRACE( log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); - return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get()), download_thread_num); + return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), download_thread_num); } String StorageS3Source::getName() const @@ -607,7 +607,7 @@ public: s3_configuration_.rw_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, - threadPoolCallbackRunner(IOThreadPool::get()), + threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), context->getWriteSettings()), compression_method, 3); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8d0d85a39ef..a44e0de9d3d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -351,7 +351,7 @@ namespace return wrapReadBufferWithCompressionMethod( std::make_unique( std::move(read_buffer_factory), - threadPoolCallbackRunner(IOThreadPool::get()), + threadPoolCallbackRunner(IOThreadPool::get(), "URLParallelRead"), download_threads), compression_method, settings.zstd_window_log_max); diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp index b285857d684..17aa5479de5 100644 --- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp +++ b/src/Storages/examples/async_read_buffer_from_hdfs.cpp @@ -25,7 +25,7 @@ int main() String path = "/path/to/hdfs/file"; ReadSettings settings = {}; auto in = std::make_unique(hdfs_namenode_url, path, *config, settings); - auto reader = IObjectStorage::getThreadPoolReader(); + auto & reader = IObjectStorage::getThreadPoolReader(); AsynchronousReadBufferFromHDFS buf(reader, {}, std::move(in)); String output; From e06ed0962e82502125d8f9db7942b5f6c9a01db8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 7 Sep 2022 17:56:20 +0200 Subject: [PATCH 070/173] Fix style check and typos check --- src/Disks/ObjectStorages/IObjectStorage.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index a0901d7ffd1..22f0ba65975 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -11,13 +11,14 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } IAsynchronousReader & IObjectStorage::getThreadPoolReader() { auto context = Context::getGlobalContextInstance(); if (!context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initalized"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); return context->getThreadPoolReader(); } @@ -26,7 +27,7 @@ ThreadPool & IObjectStorage::getThreadPoolWriter() { auto context = Context::getGlobalContextInstance(); if (!context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initalized"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); return context->getThreadPoolWriter(); } From 66d65df1932efe5dd145256e448f620f34c05ed7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 7 Sep 2022 20:17:36 +0200 Subject: [PATCH 071/173] Fix thread status destruction order for clickhouse-local --- programs/local/LocalServer.cpp | 2 +- src/Client/ClientBase.h | 9 ++++++--- src/Client/LocalConnection.cpp | 3 --- src/Client/LocalConnection.h | 1 - 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index ce31600642a..f7756d5fb13 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -366,7 +366,7 @@ int LocalServer::main(const std::vector & /*args*/) try { UseSSL use_ssl; - ThreadStatus thread_status; + thread_status.emplace(); StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 6b19c1b8e02..0d5f7298ffa 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -176,9 +176,6 @@ protected: bool stderr_is_a_tty = false; /// stderr is a terminal. uint64_t terminal_width = 0; - ServerConnectionPtr connection; - ConnectionParameters connection_parameters; - String format; /// Query results output format. bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering. bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering. @@ -199,6 +196,12 @@ protected: SharedContextHolder shared_context; ContextMutablePtr global_context; + /// thread status should be destructed before shared context because it relies on process list. + std::optional thread_status; + + ServerConnectionPtr connection; + ConnectionParameters connection_parameters; + /// Buffer that reads from stdin in batch mode. ReadBufferFromFileDescriptor std_in{STDIN_FILENO}; /// Console output. diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index b10e24f1ae4..7ac68324915 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -31,9 +31,6 @@ LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); session.makeSessionContext(); - - if (!CurrentThread::isInitialized()) - thread_status.emplace(); } LocalConnection::~LocalConnection() diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 7967874d11f..7a1a73006ac 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -156,7 +156,6 @@ private: String description = "clickhouse-local"; std::optional state; - std::optional thread_status; /// Last "server" packet. std::optional next_packet_type; From f83aba3586e058b3e36d110d0f509434ba184a3e Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 7 Sep 2022 21:41:37 +0000 Subject: [PATCH 072/173] Fix tests --- src/Parsers/ExpressionListParsers.cpp | 49 +++++++------------ src/Parsers/ExpressionListParsers.h | 19 ------- src/Parsers/ParserCreateQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.h | 2 +- .../ParserDictionaryAttributeDeclaration.cpp | 2 +- src/Parsers/ParserTablesInSelectQuery.cpp | 2 +- 6 files changed, 22 insertions(+), 54 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index feea031557e..636077ae0c1 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2105,7 +2105,7 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } -template +template struct ParserExpressionImpl { static std::vector> operators_table; @@ -2149,30 +2149,17 @@ struct ParserExpressionImpl bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - return ParserExpressionImpl().parse(pos, node, expected); -} - -bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - return ParserExpressionImpl().parse(pos, node, expected); -} - -bool ParserLogicalOrExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// Parses everything with lower than "OR" operator priority - /// TODO: make ":" and "OR" different priority and check if everything is ok - return ParserExpressionImpl().parse(pos, node, expected); + return ParserExpressionImpl().parse(pos, node, expected); } bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - /// Parses everything with lower than "INTERVAL" operator priority in previous parser return ParserKeyword("INTERVAL").parse(pos, node, expected) - && ParserExpressionImpl().parse(pos, node, expected); + && ParserExpressionImpl().parse(pos, node, expected); } -template -std::vector> ParserExpressionImpl::operators_table({ +template +std::vector> ParserExpressionImpl::operators_table({ {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, {"?", Operator("", 2, 0, OperatorType::StartIf)}, {":", Operator("if", 3, 3, OperatorType::FinishIf)}, @@ -2211,24 +2198,24 @@ std::vector> ParserExpressionImpl -std::vector> ParserExpressionImpl::unary_operators_table({ +template +std::vector> ParserExpressionImpl::unary_operators_table({ {"NOT", Operator("not", 5, 1)}, {"-", Operator("negate", 13, 1)} }); -template -Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); +template +Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); -template -const char * ParserExpressionImpl::overlapping_operators_to_skip[] = +template +const char * ParserExpressionImpl::overlapping_operators_to_skip[] = { "IN PARTITION", nullptr }; -template -bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) +template +bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) { Action next = Action::OPERAND; @@ -2299,8 +2286,8 @@ bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & } } -template -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +template +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2464,8 +2451,8 @@ typename ParserExpressionImpl::ParseResult ParserExpressionIm return ParseResult::OPERATOR; } -template -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +template +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2482,7 +2469,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionIm auto cur_op = operators_table.begin(); for (; cur_op != operators_table.end(); ++cur_op) { - if (cur_op->second.priority >= MinPriority && parseOperator(pos, cur_op->first, expected)) + if (parseOperator(pos, cur_op->first, expected)) break; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 95f36fc1c3e..3ed6055af84 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -232,25 +232,6 @@ protected: }; -class ParserLogicalOrExpression : public IParserBase -{ -protected: - const char * getName() const override { return "logical-OR expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/** An expression with ternary operator. - * For example, a = 1 ? b + 1 : c * 2. - */ -class ParserTernaryOperatorExpression : public IParserBase -{ -protected: - const char * getName() const override { return "expression with ternary operator"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - class ParserExpression : public IParserBase { protected: diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 08240abe8c6..2349b781259 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -152,7 +152,7 @@ bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_assume("ASSUME"); ParserIdentifier name_p; - ParserLogicalOrExpression expression_p; + ParserExpression expression_p; ASTPtr name; ASTPtr expr; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index f56e0a4c3a0..b5480fa6eae 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -134,7 +134,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; ParserKeyword s_collate{"COLLATE"}; - ParserTernaryOperatorExpression expr_parser; + ParserExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; ParserCodec codec_parser; diff --git a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp index 44bb7fb6057..9910c73e541 100644 --- a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp @@ -20,7 +20,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_is_object_id{"IS_OBJECT_ID"}; ParserLiteral default_parser; ParserArrayOfLiterals array_literals_parser; - ParserTernaryOperatorExpression expression_parser; + ParserExpression expression_parser; /// mandatory attribute name ASTPtr name; diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 8137093b990..ef39df8ca52 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -226,7 +226,7 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec else if (ParserKeyword("ON").ignore(pos, expected)) { /// OR is operator with lowest priority, so start parsing from it. - if (!ParserLogicalOrExpression().parse(pos, table_join->on_expression, expected)) + if (!ParserExpression().parse(pos, table_join->on_expression, expected)) return false; } else From 3dc19b4fe2d6552b28e55d5ec56e576de9c83c84 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 8 Sep 2022 12:23:59 +0200 Subject: [PATCH 073/173] Fix --- src/Disks/IO/ThreadPoolReader.cpp | 3 ++- src/Disks/IO/ThreadPoolReader.h | 2 -- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 3 ++- src/Disks/IO/ThreadPoolRemoteFSReader.h | 1 - src/IO/ParallelReadBuffer.h | 5 +++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index c81dfb50275..2987d2ef382 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -86,7 +86,6 @@ static bool hasBugInPreadV2() ThreadPoolReader::ThreadPoolReader(size_t pool_size, size_t queue_size_) : pool(pool_size, pool_size, queue_size_) - , schedule(threadPoolCallbackRunner(pool, "ThreadPoolRead")) { } @@ -201,6 +200,8 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); + auto schedule = threadPoolCallbackRunner(pool, "ThreadPoolRead"); + return schedule([request, fd]() -> Result { Stopwatch watch(CLOCK_MONOTONIC); diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h index 376a8889da3..dc754e0a81c 100644 --- a/src/Disks/IO/ThreadPoolReader.h +++ b/src/Disks/IO/ThreadPoolReader.h @@ -30,8 +30,6 @@ class ThreadPoolReader final : public IAsynchronousReader private: ThreadPool pool; - ThreadPoolCallbackRunner schedule; - public: ThreadPoolReader(size_t pool_size, size_t queue_size_); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index b4e6b4ee007..561acc00f6f 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -33,13 +33,14 @@ IAsynchronousReader::Result RemoteFSFileDescriptor::readInto(char * data, size_t ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_) : pool(pool_size, pool_size, queue_size_) - , schedule(threadPoolCallbackRunner(pool, "VFSRead")) { } std::future ThreadPoolRemoteFSReader::submit(Request request) { + auto schedule = threadPoolCallbackRunner(pool, "VFSRead"); + return schedule([request]() -> Result { CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index c92eb7f7032..f3777c6638d 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -19,7 +19,6 @@ public: private: ThreadPool pool; - ThreadPoolCallbackRunner schedule; }; class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index 2f8cbf60c14..d6e9b7989ad 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -33,12 +33,13 @@ public: class ReadBufferFactory : public WithFileSize { public: + ~ReadBufferFactory() override = default; + virtual SeekableReadBufferPtr getReader() = 0; - virtual ~ReadBufferFactory() override = default; virtual off_t seek(off_t off, int whence) = 0; }; - explicit ParallelReadBuffer(std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers); + ParallelReadBuffer(std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers); ~ParallelReadBuffer() override { finishAndWait(); } From 6a885430d7e5d6d9084de67793e0560aecc1ce99 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 9 Sep 2022 13:36:23 +0200 Subject: [PATCH 074/173] Add assertion --- src/Common/ThreadStatus.cpp | 4 ++++ src/Interpreters/Context.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index b62a7af6c71..2794d12d926 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -92,6 +92,8 @@ std::vector ThreadGroupStatus ThreadStatus::ThreadStatus() : thread_id{getThreadId()} { + chassert(!current_thread); + last_rusage = std::make_unique(); memory_tracker.setDescription("(for thread)"); @@ -145,6 +147,8 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { + chassert(current_thread); + memory_tracker.adjustWithUntrackedMemory(untracked_memory); if (thread_group) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index edfa141b0ed..76a29f10b5a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -323,6 +323,7 @@ struct ContextSharedPart { try { + LOG_DEBUG(log, "Desctructing threadpool reader"); threadpool_reader->wait(); threadpool_reader.reset(); } @@ -336,6 +337,7 @@ struct ContextSharedPart { try { + LOG_DEBUG(log, "Desctructing threadpool writer"); threadpool_writer->wait(); threadpool_writer.reset(); } From 86f74fc2d8c787198288733fc6a3625c4abe2b81 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 9 Sep 2022 14:48:03 +0200 Subject: [PATCH 075/173] Update src/Common/ThreadStatus.cpp Co-authored-by: Alexander Tokmakov --- src/Common/ThreadStatus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 2794d12d926..1e60fb96916 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -147,7 +147,7 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { - chassert(current_thread); + chassert(current_thread == this); memory_tracker.adjustWithUntrackedMemory(untracked_memory); From b987e1896061aff39ca49ab1ee43a792709f6e82 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 9 Sep 2022 21:07:50 +0300 Subject: [PATCH 076/173] Update src/Common/ThreadStatus.cpp --- src/Common/ThreadStatus.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 1e60fb96916..5309aad6ebb 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -92,8 +92,6 @@ std::vector ThreadGroupStatus ThreadStatus::ThreadStatus() : thread_id{getThreadId()} { - chassert(!current_thread); - last_rusage = std::make_unique(); memory_tracker.setDescription("(for thread)"); From 4fb9bafb2ed51af2b33158dc4f3a60ba8dc74bca Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 9 Sep 2022 18:40:39 +0000 Subject: [PATCH 077/173] Fixes & remove more of old parser --- src/Parsers/ExpressionElementParsers.cpp | 92 ++----------------- src/Parsers/ExpressionElementParsers.h | 20 ---- src/Parsers/ExpressionListParsers.cpp | 8 +- src/Parsers/ExpressionListParsers.h | 9 ++ .../00984_parser_stack_overflow.reference | 1 + 5 files changed, 26 insertions(+), 104 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index dfce06fa7c5..74fbe5eee54 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -40,6 +40,7 @@ #include #include +#include namespace DB @@ -53,89 +54,6 @@ namespace ErrorCodes } -bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr contents_node; - ParserExpressionList contents(false); - - if (pos->type != TokenType::OpeningSquareBracket) - return false; - ++pos; - - if (!contents.parse(pos, contents_node, expected)) - return false; - - if (pos->type != TokenType::ClosingSquareBracket) - return false; - ++pos; - - auto function_node = std::make_shared(); - function_node->name = "array"; - function_node->arguments = contents_node; - function_node->children.push_back(contents_node); - node = function_node; - - return true; -} - - -bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr contents_node; - ParserExpressionList contents(false); - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - if (!contents.parse(pos, contents_node, expected)) - return false; - - bool is_elem = true; - if (pos->type == TokenType::Comma) - { - is_elem = false; - ++pos; - } - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - const auto & expr_list = contents_node->as(); - - /// Empty expression in parentheses is not allowed. - if (expr_list.children.empty()) - { - expected.add(pos, "non-empty parenthesized list of expressions"); - return false; - } - - /// Special case for one-element tuple. - if (expr_list.children.size() == 1 && is_elem) - { - auto * ast_literal = expr_list.children.front()->as(); - /// But only if its argument is not tuple, - /// since otherwise it will do incorrect transformation: - /// - /// (foo,bar) IN (('foo','bar')) -> (foo,bar) IN ('foo','bar') - if (!(ast_literal && ast_literal->value.getType() == Field::Types::Tuple)) - { - node = expr_list.children.front(); - return true; - } - } - - auto function_node = std::make_shared(); - function_node->name = "tuple"; - function_node->arguments = contents_node; - function_node->children.push_back(contents_node); - node = function_node; - - return true; -} - - bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr select_node; @@ -1885,6 +1803,7 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, std::vector> layers; layers.emplace_back(pos); + pos.increaseDepth(); ParserLiteral literal_p; @@ -1905,6 +1824,7 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, literal->end = ++pos; layers.pop_back(); + pos.decreaseDepth(); if (layers.empty()) { @@ -1913,6 +1833,7 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, } layers.back().arr.push_back(literal->value); + continue; } else if (pos->type == TokenType::Comma) { @@ -1931,9 +1852,14 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, ASTPtr literal_node; if (literal_p.parse(pos, literal_node, expected)) + { layers.back().arr.push_back(literal_node->as().value); + } else if (pos->type == opening_bracket) + { layers.emplace_back(pos); + pos.increaseDepth(); + } else return false; } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index abd63f81424..3bdb9731d79 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -9,26 +9,6 @@ namespace DB { -class ParserArray : public IParserBase -{ -protected: - const char * getName() const override { return "array"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/** If in parenthesis an expression from one element - returns this element in `node`; - * or if there is a SELECT subquery in parenthesis, then this subquery returned in `node`; - * otherwise returns `tuple` function from the contents of brackets. - */ -class ParserParenthesisExpression : public IParserBase -{ -protected: - const char * getName() const override { return "parenthesized expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - /** The SELECT subquery is in parenthesis. */ class ParserSubquery : public IParserBase diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 636077ae0c1..e337324df01 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2158,6 +2158,12 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec && ParserExpressionImpl().parse(pos, node, expected); } +bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserToken(TokenType::OpeningSquareBracket).ignore(pos, expected) + && ParserExpressionImpl().parse(pos, node, expected); +} + template std::vector> ParserExpressionImpl::operators_table({ {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, @@ -2362,7 +2368,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::try if (ParseDateOperatorExpression(pos, tmp, expected) || ParseTimestampOperatorExpression(pos, tmp, expected) || tuple_literal_parser.parse(pos, tmp, expected) || - (layers.size() == 1 && array_literal_parser.parse(pos, tmp, expected)) || + array_literal_parser.parse(pos, tmp, expected) || number_parser.parse(pos, tmp, expected) || literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 3ed6055af84..c235739d4ed 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -240,6 +240,15 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// TODO: ? +class ParserArray : public IParserBase +{ +protected: + const char * getName() const override { return "array"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase { diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.reference b/tests/queries/0_stateless/00984_parser_stack_overflow.reference index 35186521642..0cf6a1f96df 100644 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.reference +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.reference @@ -1,3 +1,4 @@ exceeded +exceeded 20002 1 From 26bfabbb39105bbe53407f52469218b28f7ded87 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Sep 2022 15:26:45 +0000 Subject: [PATCH 078/173] Fix bugs & remove old parsers --- src/Parsers/ExpressionElementParsers.cpp | 770 ----------------------- src/Parsers/ExpressionElementParsers.h | 38 +- src/Parsers/ExpressionListParsers.cpp | 169 +++-- src/Parsers/ExpressionListParsers.h | 21 + src/Parsers/ParserExternalDDLQuery.cpp | 2 +- 5 files changed, 148 insertions(+), 852 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 74fbe5eee54..1bb1df351fd 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -40,7 +40,6 @@ #include #include -#include namespace DB @@ -214,775 +213,6 @@ ASTPtr createFunctionCast(const ASTPtr & expr_ast, const ASTPtr & type_ast) } -namespace -{ - bool parseCastAs(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /** Possible variants for cast operator cast(expr [[AS] alias_1] AS Type), cast(expr [[AS] alias_1], type_expr [[as] alias_2]). - * First try to match with cast(expr [[AS] alias_1] AS Type) - * Then try to match with cast(expr [[AS] alias_1], type_expr [[as] alias_2]). - */ - - ASTPtr expr_node; - ASTPtr type_node; - ASTPtr identifier_node; - - if (ParserExpression().parse(pos, expr_node, expected)) - { - ParserKeyword as_keyword_parser("AS"); - bool parse_as = as_keyword_parser.ignore(pos, expected); - - /// CAST (a b AS UInt32) OR CAST (a b, expr) - - if (!parse_as && ParserIdentifier().parse(pos, identifier_node, expected)) - { - expr_node->setAlias(getIdentifierName(identifier_node)); - parse_as = as_keyword_parser.ignore(pos, expected); - } - - if (parse_as) - { - /// CAST (a AS Type) OR CAST (a AS b AS Type) OR CAST (a AS b, expr) - - auto begin = pos; - auto expected_copy = expected; - bool next_identifier = ParserIdentifier().ignore(begin, expected_copy); - bool next_identifier_with_comma = next_identifier && ParserToken(TokenType::Comma).ignore(begin, expected_copy); - bool next_identifier_with_as - = next_identifier && !next_identifier_with_comma && as_keyword_parser.ignore(begin, expected_copy); - - if (next_identifier_with_as) - { - if (ParserIdentifier().parse(pos, identifier_node, expected) && as_keyword_parser.ignore(pos, expected)) - expr_node->setAlias(getIdentifierName(identifier_node)); - else - return false; - } - - if (!next_identifier_with_comma && ParserDataType().parse(pos, type_node, expected)) - { - node = createFunctionCast(expr_node, type_node); - return true; - } - } - - /// CAST(a AS b, expr) - - if (parse_as) - { - if (ParserIdentifier().parse(pos, identifier_node, expected)) - expr_node->setAlias(getIdentifierName(identifier_node)); - else - return false; - } - - if (ParserToken(TokenType::Comma).ignore(pos, expected) - && ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) - { - node = makeASTFunction("CAST", expr_node, type_node); - return true; - } - } - - return false; - } - - bool parseSubstring(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) - /// The latter will be parsed normally as a function later. - - ParserKeyword as_keyword_parser("AS"); - ParserIdentifier identifier_parser; - - ASTPtr expr_node; - ASTPtr start_node; - ASTPtr length_node; - - if (!ParserExpression().parse(pos, expr_node, expected)) - return false; - - auto from_keyword_parser = ParserKeyword("FROM"); - bool from_exists = from_keyword_parser.check(pos, expected); - - if (!from_exists && pos->type != TokenType::Comma) - { - ASTPtr identifier_node; - bool parsed_as = as_keyword_parser.ignore(pos, expected); - bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); - - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - expr_node->setAlias(getIdentifierName(identifier_node)); - - from_exists = from_keyword_parser.check(pos, expected); - } - - if (pos->type == TokenType::Comma) - { - if (from_exists) - return false; - - ++pos; - } - - if (!ParserExpression().parse(pos, start_node, expected)) - return false; - - auto for_keyword_parser = ParserKeyword("FOR"); - bool for_exists = for_keyword_parser.check(pos, expected); - if (!for_exists && pos->type != TokenType::Comma) - { - ASTPtr identifier_node; - bool parsed_as = as_keyword_parser.ignore(pos, expected); - bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - start_node->setAlias(getIdentifierName(identifier_node)); - - for_exists = for_keyword_parser.check(pos, expected); - } - - bool need_parse_length_expression = for_exists; - if (pos->type == TokenType::Comma) - { - if (for_exists) - return false; - - ++pos; - need_parse_length_expression = true; - } - - if (need_parse_length_expression - && !ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected)) - return false; - - /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length) - if (length_node) - node = makeASTFunction("substring", expr_node, start_node, length_node); - else - node = makeASTFunction("substring", expr_node, start_node); - - return true; - } - - bool parseTrim(bool trim_left, bool trim_right, IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// Handles all possible TRIM/LTRIM/RTRIM call variants ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) - - std::string func_name; - bool char_override = false; - ASTPtr expr_node; - ASTPtr pattern_node; - ASTPtr to_remove; - - if (!trim_left && !trim_right) - { - if (ParserKeyword("BOTH").ignore(pos, expected)) - { - trim_left = true; - trim_right = true; - char_override = true; - } - else if (ParserKeyword("LEADING").ignore(pos, expected)) - { - trim_left = true; - char_override = true; - } - else if (ParserKeyword("TRAILING").ignore(pos, expected)) - { - trim_right = true; - char_override = true; - } - else - { - trim_left = true; - trim_right = true; - } - - if (char_override) - { - if (!ParserExpression().parse(pos, to_remove, expected)) - return false; - - auto from_keyword_parser = ParserKeyword("FROM"); - bool from_exists = from_keyword_parser.check(pos, expected); - - if (!from_exists) - { - ASTPtr identifier_node; - bool parsed_as = ParserKeyword("AS").ignore(pos, expected); - bool parsed_identifer = ParserIdentifier().parse(pos, identifier_node, expected); - - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - to_remove->setAlias(getIdentifierName(identifier_node)); - - from_exists = from_keyword_parser.check(pos, expected); - } - - if (!from_exists) - return false; - - auto quote_meta_func_node = std::make_shared(); - auto quote_meta_list_args = std::make_shared(); - quote_meta_list_args->children = {to_remove}; - - quote_meta_func_node->name = "regexpQuoteMeta"; - quote_meta_func_node->arguments = std::move(quote_meta_list_args); - quote_meta_func_node->children.push_back(quote_meta_func_node->arguments); - - to_remove = std::move(quote_meta_func_node); - } - } - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) - return false; - - /// Convert to regexp replace function call - - if (char_override) - { - auto pattern_func_node = std::make_shared(); - auto pattern_list_args = std::make_shared(); - if (trim_left && trim_right) - { - pattern_list_args->children - = {std::make_shared("^["), - to_remove, - std::make_shared("]+|["), - to_remove, - std::make_shared("]+$")}; - func_name = "replaceRegexpAll"; - } - else - { - if (trim_left) - { - pattern_list_args->children = {std::make_shared("^["), to_remove, std::make_shared("]+")}; - } - else - { - /// trim_right == false not possible - pattern_list_args->children = {std::make_shared("["), to_remove, std::make_shared("]+$")}; - } - func_name = "replaceRegexpOne"; - } - - pattern_func_node->name = "concat"; - pattern_func_node->arguments = std::move(pattern_list_args); - pattern_func_node->children.push_back(pattern_func_node->arguments); - - pattern_node = std::move(pattern_func_node); - } - else - { - if (trim_left && trim_right) - { - func_name = "trimBoth"; - } - else - { - if (trim_left) - { - func_name = "trimLeft"; - } - else - { - /// trim_right == false not possible - func_name = "trimRight"; - } - } - } - - if (char_override) - node = makeASTFunction(func_name, expr_node, pattern_node, std::make_shared("")); - else - node = makeASTFunction(func_name, expr_node); - return true; - } - - bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First try to match with date extract operator EXTRACT(part FROM date) - /// Then with function extract(haystack, pattern) - - IParser::Pos begin = pos; - IntervalKind interval_kind; - - if (parseIntervalKind(pos, expected, interval_kind)) - { - ASTPtr expr; - - ParserKeyword s_from("FROM"); - ParserExpressionWithOptionalAlias elem_parser(true /*allow_alias_without_as_keyword*/); - - if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected)) - { - node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); - return true; - } - } - - pos = begin; - - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "extract"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - bool parsePosition(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First try to match with position(needle IN haystack) - /// Then with position(haystack, needle[, start_pos]) - - ParserExpressionWithOptionalAlias expr_parser(true /*allow_alias_without_as_keyword*/); - - ASTPtr first_arg_expr_node; - if (!expr_parser.parse(pos, first_arg_expr_node, expected)) - { - return false; - } - - ASTFunction * func_in = typeid_cast(first_arg_expr_node.get()); - if (func_in && func_in->name == "in") - { - ASTExpressionList * in_args = typeid_cast(func_in->arguments.get()); - if (in_args && in_args->children.size() == 2) - { - node = makeASTFunction("position", in_args->children[1], in_args->children[0]); - return true; - } - } - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - ASTPtr second_arg_expr_node; - if (!expr_parser.parse(pos, second_arg_expr_node, expected)) - { - return false; - } - - ASTPtr start_pos_expr_node; - if (pos->type == TokenType::Comma) - { - ++pos; - - if (!expr_parser.parse(pos, start_pos_expr_node, expected)) - return false; - } - - auto arguments = std::make_shared(); - arguments->children.push_back(std::move(first_arg_expr_node)); - arguments->children.push_back(std::move(second_arg_expr_node)); - - if (start_pos_expr_node) - arguments->children.push_back(std::move(start_pos_expr_node)); - - auto res = std::make_shared(); - res->name = "position"; - res->arguments = std::move(arguments); - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - bool parseDateAdd(const char * function_name, IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First to match with function(unit, offset, timestamp) - /// Then with function(offset, timestamp) - - ASTPtr timestamp_node; - ASTPtr offset_node; - - IntervalKind interval_kind; - ASTPtr interval_func_node; - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, offset_node, expected)) - return false; - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timestamp_node, expected)) - return false; - auto interval_expr_list_args = std::make_shared(); - interval_expr_list_args->children = {offset_node}; - - interval_func_node = std::make_shared(); - interval_func_node->as().name = interval_kind.toNameOfFunctionToIntervalDataType(); - interval_func_node->as().arguments = std::move(interval_expr_list_args); - interval_func_node->as().children.push_back(interval_func_node->as().arguments); - } - else - { - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = function_name; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - node = makeASTFunction(function_name, timestamp_node, interval_func_node); - return true; - } - - bool parseDateDiff(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First to match with dateDiff(unit, startdate, enddate, [timezone]) - /// Then with dateDiff('unit', startdate, enddate, [timezone]) - - ASTPtr left_node; - ASTPtr right_node; - - IntervalKind interval_kind; - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, left_node, expected)) - return false; - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, right_node, expected)) - return false; - - ASTPtr timezone_node; - - if (pos->type == TokenType::Comma) - { - /// Optional timezone - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timezone_node, expected)) - return false; - } - - auto interval_literal = std::make_shared(interval_kind.toDateDiffUnit()); - if (timezone_node) - node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node), std::move(timezone_node)); - else - node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node)); - - return true; - } - - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "dateDiff"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - - return true; - } - - bool parseExists(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) - return false; - - auto subquery = std::make_shared(); - subquery->children.push_back(node); - node = makeASTFunction("exists", subquery); - return true; - } - - bool parseGrouping(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "grouping"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } -} - - -bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserIdentifier id_parser; - - bool has_all = false; - bool has_distinct = false; - - ASTPtr identifier; - ASTPtr query; - ASTPtr expr_list_args; - ASTPtr expr_list_params; - - if (is_table_function) - { - if (ParserTableFunctionView().parse(pos, node, expected)) - return true; - } - - if (!id_parser.parse(pos, identifier, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - /// Avoid excessive backtracking. - //pos.putBarrier(); - - /// Special cases for expressions that look like functions but contain some syntax sugar: - - /// CAST, EXTRACT, POSITION, EXISTS - /// DATE_ADD, DATEADD, TIMESTAMPADD, DATE_SUB, DATESUB, TIMESTAMPSUB, - /// DATE_DIFF, DATEDIFF, TIMESTAMPDIFF, TIMESTAMP_DIFF, - /// SUBSTRING, TRIM, LTRIM, RTRIM, POSITION - - /// Can be parsed as a composition of functions, but the contents must be unwrapped: - /// POSITION(x IN y) -> POSITION(in(x, y)) -> POSITION(y, x) - - /// Can be parsed as a function, but not always: - /// CAST(x AS type) - alias has to be unwrapped - /// CAST(x AS type(params)) - - /// Can be parsed as a function, but some identifier arguments have special meanings. - /// DATE_ADD(MINUTE, x, y) -> addMinutes(x, y) - /// DATE_DIFF(MINUTE, x, y) - - /// Have keywords that have to processed explicitly: - /// EXTRACT(x FROM y) - /// TRIM(BOTH|LEADING|TRAILING x FROM y) - /// SUBSTRING(x FROM a) - /// SUBSTRING(x FROM a FOR b) - - String function_name = getIdentifierName(identifier); - String function_name_lowercase = Poco::toLower(function_name); - - std::optional parsed_special_function; - - if (function_name_lowercase == "cast") - parsed_special_function = parseCastAs(pos, node, expected); - else if (function_name_lowercase == "extract") - parsed_special_function = parseExtract(pos, node, expected); - else if (function_name_lowercase == "substring") - parsed_special_function = parseSubstring(pos, node, expected); - else if (function_name_lowercase == "position") - parsed_special_function = parsePosition(pos, node, expected); - else if (function_name_lowercase == "exists") - parsed_special_function = parseExists(pos, node, expected); - else if (function_name_lowercase == "trim") - parsed_special_function = parseTrim(false, false, pos, node, expected); - else if (function_name_lowercase == "ltrim") - parsed_special_function = parseTrim(true, false, pos, node, expected); - else if (function_name_lowercase == "rtrim") - parsed_special_function = parseTrim(false, true, pos, node, expected); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - parsed_special_function = parseDateAdd("plus", pos, node, expected); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - parsed_special_function = parseDateAdd("minus", pos, node, expected); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - parsed_special_function = parseDateDiff(pos, node, expected); - else if (function_name_lowercase == "grouping") - parsed_special_function = parseGrouping(pos, node, expected); - - if (parsed_special_function.has_value()) - return parsed_special_function.value() && ParserToken(TokenType::ClosingRoundBracket).ignore(pos); - - auto pos_after_bracket = pos; - auto old_expected = expected; - - ParserKeyword all("ALL"); - ParserKeyword distinct("DISTINCT"); - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_all = false; - has_distinct = false; - } - } - - ParserExpressionList contents(false, is_table_function); - - const char * contents_begin = pos->begin; - if (!contents.parse(pos, expr_list_args, expected)) - return false; - const char * contents_end = pos->begin; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - /** Check for a common error case - often due to the complexity of quoting command-line arguments, - * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). - * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, - * and the query silently returns an unexpected result. - */ - if (function_name == "toDate" - && contents_end - contents_begin == strlen("2014-01-01") - && contents_begin[0] >= '2' && contents_begin[0] <= '3' - && contents_begin[1] >= '0' && contents_begin[1] <= '9' - && contents_begin[2] >= '0' && contents_begin[2] <= '9' - && contents_begin[3] >= '0' && contents_begin[3] <= '9' - && contents_begin[4] == '-' - && contents_begin[5] >= '0' && contents_begin[5] <= '9' - && contents_begin[6] >= '0' && contents_begin[6] <= '9' - && contents_begin[7] == '-' - && contents_begin[8] >= '0' && contents_begin[8] <= '9' - && contents_begin[9] >= '0' && contents_begin[9] <= '9') - { - std::string contents_str(contents_begin, contents_end - contents_begin); - throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" - , ErrorCodes::SYNTAX_ERROR); - } - - /// The parametric aggregate function has two lists (parameters and arguments) in parentheses. Example: quantile(0.9)(x). - if (allow_function_parameters && pos->type == TokenType::OpeningRoundBracket) - { - ++pos; - - /// Parametric aggregate functions cannot have DISTINCT in parameters list. - if (has_distinct) - return false; - - expr_list_params = expr_list_args; - expr_list_args = nullptr; - - pos_after_bracket = pos; - old_expected = expected; - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_distinct = false; - } - } - - if (!contents.parse(pos, expr_list_args, expected)) - return false; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - } - - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - - /// func(DISTINCT ...) is equivalent to funcDistinct(...) - if (has_distinct) - function_node->name += "Distinct"; - - function_node->arguments = expr_list_args; - function_node->children.push_back(function_node->arguments); - - if (expr_list_params) - { - function_node->parameters = expr_list_params; - function_node->children.push_back(function_node->parameters); - } - - ParserKeyword filter("FILTER"); - ParserKeyword over("OVER"); - - if (filter.ignore(pos, expected)) - { - // We are slightly breaking the parser interface by parsing the window - // definition into an existing ASTFunction. Normally it would take a - // reference to ASTPtr and assign it the new node. We only have a pointer - // of a different type, hence this workaround with a temporary pointer. - ASTPtr function_node_as_iast = function_node; - - ParserFilterClause filter_parser; - if (!filter_parser.parse(pos, function_node_as_iast, expected)) - return false; - } - - if (over.ignore(pos, expected)) - { - function_node->is_window_function = true; - - ASTPtr function_node_as_iast = function_node; - - ParserWindowReference window_reference; - if (!window_reference.parse(pos, function_node_as_iast, expected)) - return false; - } - - node = function_node; - return true; -} - bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier id_parser; diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 3bdb9731d79..e4fef4852ac 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -121,26 +121,26 @@ protected: ColumnTransformers allowed_transformers; }; -/** A function, for example, f(x, y + 1, g(z)). - * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. - * Or a parametric aggregate function: quantile(0.9)(x + y). - * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. - * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). - */ -class ParserFunction : public IParserBase -{ -public: - explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) - : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) - { - } +// /** A function, for example, f(x, y + 1, g(z)). +// * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. +// * Or a parametric aggregate function: quantile(0.9)(x + y). +// * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. +// * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). +// */ +// class ParserFunction : public IParserBase +// { +// public: +// explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) +// : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) +// { +// } -protected: - const char * getName() const override { return "function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool allow_function_parameters; - bool is_table_function; -}; +// protected: +// const char * getName() const override { return "function"; } +// bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +// bool allow_function_parameters; +// bool is_table_function; +// }; // A special function parser for view and viewIfPermitted table functions. // It parses an SELECT query as its argument and doesn't support getColumnName(). diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e337324df01..6d343a2c407 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1007,13 +1007,12 @@ public: } }; -/// General function layer -class FunctionLayer : public Layer + +class OrdinaryFunctionLayer : public Layer { public: - explicit FunctionLayer(String function_name_) : function_name(function_name_) - { - } + explicit OrdinaryFunctionLayer(String function_name_, bool allow_function_parameters_ = true) + : function_name(function_name_), allow_function_parameters(allow_function_parameters_){} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1102,7 +1101,7 @@ public: , ErrorCodes::SYNTAX_ERROR); } - if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + if (allow_function_parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { parameters = std::make_shared(); std::swap(parameters->children, elements); @@ -1206,6 +1205,8 @@ private: String function_name; ASTPtr parameters; + + bool allow_function_parameters; }; /// Layer for priority brackets and tuple function @@ -2048,6 +2049,67 @@ private: }; +std::unique_ptr getFunctionLayer(ASTPtr identifier, bool allow_function_parameters_ = true) +{ + /// Special cases for expressions that look like functions but contain some syntax sugar: + + /// CAST, EXTRACT, POSITION, EXISTS + /// DATE_ADD, DATEADD, TIMESTAMPADD, DATE_SUB, DATESUB, TIMESTAMPSUB, + /// DATE_DIFF, DATEDIFF, TIMESTAMPDIFF, TIMESTAMP_DIFF, + /// SUBSTRING, TRIM, LTRIM, RTRIM, POSITION + + /// Can be parsed as a composition of functions, but the contents must be unwrapped: + /// POSITION(x IN y) -> POSITION(in(x, y)) -> POSITION(y, x) + + /// Can be parsed as a function, but not always: + /// CAST(x AS type) - alias has to be unwrapped + /// CAST(x AS type(params)) + + /// Can be parsed as a function, but some identifier arguments have special meanings. + /// DATE_ADD(MINUTE, x, y) -> addMinutes(x, y) + /// DATE_DIFF(MINUTE, x, y) + + /// Have keywords that have to processed explicitly: + /// EXTRACT(x FROM y) + /// TRIM(BOTH|LEADING|TRAILING x FROM y) + /// SUBSTRING(x FROM a) + /// SUBSTRING(x FROM a FOR b) + + String function_name = getIdentifierName(identifier); + String function_name_lowercase = Poco::toLower(function_name); + + if (function_name_lowercase == "cast") + return std::make_unique(); + else if (function_name_lowercase == "extract") + return std::make_unique(); + else if (function_name_lowercase == "substring") + return std::make_unique(); + else if (function_name_lowercase == "position") + return std::make_unique(); + else if (function_name_lowercase == "exists") + return std::make_unique(); + else if (function_name_lowercase == "trim") + return std::make_unique(false, false); + else if (function_name_lowercase == "ltrim") + return std::make_unique(true, false); + else if (function_name_lowercase == "rtrim") + return std::make_unique(false, true); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + return std::make_unique("plus"); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + return std::make_unique("minus"); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + return std::make_unique(); + else if (function_name_lowercase == "grouping") + return std::make_unique(function_name_lowercase, allow_function_parameters_); + else + return std::make_unique(function_name, allow_function_parameters_); +} + + bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) { IParser::Pos begin = pos; @@ -2105,7 +2167,6 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte return true; } -template struct ParserExpressionImpl { static std::vector> operators_table; @@ -2131,7 +2192,7 @@ struct ParserExpressionImpl ParserColumnsMatcher columns_matcher_parser; ParserSubquery subquery_parser; - bool parse(IParser::Pos & pos, ASTPtr & node, Expected & expected); + bool parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected); enum class ParseResult { @@ -2149,23 +2210,47 @@ struct ParserExpressionImpl bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - return ParserExpressionImpl().parse(pos, node, expected); + auto start = std::make_unique(); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + auto start = std::make_unique(); return ParserKeyword("INTERVAL").parse(pos, node, expected) - && ParserExpressionImpl().parse(pos, node, expected); + && ParserExpressionImpl().parse(std::move(start), pos, node, expected); } bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + auto start = std::make_unique(); return ParserToken(TokenType::OpeningSquareBracket).ignore(pos, expected) - && ParserExpressionImpl().parse(pos, node, expected); + && ParserExpressionImpl().parse(std::move(start), pos, node, expected); } -template -std::vector> ParserExpressionImpl::operators_table({ +bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (is_table_function) + { + if (ParserTableFunctionView().parse(pos, node, expected)) + return true; + } + + ASTPtr identifier; + + if (ParserIdentifier(true).parse(pos, identifier, expected) + && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + auto start = getFunctionLayer(identifier, allow_function_parameters); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); + } + else + { + return false; + } +} + +std::vector> ParserExpressionImpl::operators_table({ {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, {"?", Operator("", 2, 0, OperatorType::StartIf)}, {":", Operator("if", 3, 3, OperatorType::FinishIf)}, @@ -2204,29 +2289,25 @@ std::vector> ParserExpressionImpl::opera {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, }); -template -std::vector> ParserExpressionImpl::unary_operators_table({ +std::vector> ParserExpressionImpl::unary_operators_table({ {"NOT", Operator("not", 5, 1)}, {"-", Operator("negate", 13, 1)} }); -template -Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); +Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); -template -const char * ParserExpressionImpl::overlapping_operators_to_skip[] = +const char * ParserExpressionImpl::overlapping_operators_to_skip[] = { "IN PARTITION", nullptr }; -template -bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expected & expected) +bool ParserExpressionImpl::parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected) { Action next = Action::OPERAND; std::vector> layers; - layers.push_back(std::make_unique()); + layers.push_back(std::move(start)); while (true) { @@ -2292,8 +2373,7 @@ bool ParserExpressionImpl::parse(IParser::Pos & pos, ASTPtr & node, Expect } } -template -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2382,40 +2462,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::try if (pos->type == TokenType::OpeningRoundBracket) { ++pos; - - String function_name = getIdentifierName(tmp); - String function_name_lowercase = Poco::toLower(function_name); - - if (function_name_lowercase == "cast") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "extract") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "substring") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "position") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "exists") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "trim") - layers.push_back(std::make_unique(false, false)); - else if (function_name_lowercase == "ltrim") - layers.push_back(std::make_unique(true, false)); - else if (function_name_lowercase == "rtrim") - layers.push_back(std::make_unique(false, true)); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - layers.push_back(std::make_unique("plus")); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - layers.push_back(std::make_unique("minus")); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - layers.push_back(std::make_unique()); - else if (function_name_lowercase == "grouping") - layers.push_back(std::make_unique(function_name_lowercase)); - else - layers.push_back(std::make_unique(function_name)); - + layers.push_back(getFunctionLayer(tmp)); return ParseResult::OPERAND; } else @@ -2457,8 +2504,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::try return ParseResult::OPERATOR; } -template -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2486,9 +2532,8 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::try if (!layers.back()->insertAlias(tmp)) return ParseResult::ERROR; - return ParseResult::OPERAND; + return ParseResult::OPERATOR; } - return ParseResult::END; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index c235739d4ed..c79f19d9508 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -248,6 +248,27 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/** A function, for example, f(x, y + 1, g(z)). + * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. + * Or a parametric aggregate function: quantile(0.9)(x + y). + * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. + * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). + */ +class ParserFunction : public IParserBase +{ +public: + explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) + : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) + { + } + +protected: + const char * getName() const override { return "function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool allow_function_parameters; + bool is_table_function; +}; + // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 87630777fa6..4839ce73614 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include From d42a751c1ff18306faffb9acc13aeba26b6a4d30 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 13 Sep 2022 20:31:08 +0000 Subject: [PATCH 079/173] Fix table function view & more cool stuff --- src/Parsers/ExpressionElementParsers.cpp | 85 +----- src/Parsers/ExpressionElementParsers.h | 30 -- src/Parsers/ExpressionListParsers.cpp | 353 ++++++++++------------- src/Parsers/ExpressionListParsers.h | 92 +----- 4 files changed, 157 insertions(+), 403 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1bb1df351fd..c72125f4afa 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -199,81 +199,10 @@ ASTPtr createFunctionCast(const ASTPtr & expr_ast, const ASTPtr & type_ast) { /// Convert to canonical representation in functional form: CAST(expr, 'type') auto type_literal = std::make_shared(queryToString(type_ast)); - - auto expr_list_args = std::make_shared(); - expr_list_args->children.push_back(expr_ast); - expr_list_args->children.push_back(std::move(type_literal)); - - auto func_node = std::make_shared(); - func_node->name = "CAST"; - func_node->arguments = std::move(expr_list_args); - func_node->children.push_back(func_node->arguments); - - return func_node; + return makeASTFunction("CAST", expr_ast, type_literal); } -bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserIdentifier id_parser; - ParserSelectWithUnionQuery select; - - ASTPtr identifier; - ASTPtr query; - - bool if_permitted = false; - - if (ParserKeyword{"VIEWIFPERMITTED"}.ignore(pos, expected)) - if_permitted = true; - else if (!ParserKeyword{"VIEW"}.ignore(pos, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - - ++pos; - - bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (!select.parse(pos, query, expected)) - return false; - - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - return false; - } - - ASTPtr else_ast; - if (if_permitted) - { - if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) - return false; - - if (!ParserWithOptionalAlias{std::make_unique(true, true), true}.parse(pos, else_ast, expected)) - return false; - } - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - - ++pos; - - auto expr_list = std::make_shared(); - expr_list->children.push_back(query); - if (if_permitted) - expr_list->children.push_back(else_ast); - - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - function_node->name = if_permitted ? "viewIfPermitted" : "view"; - function_node->arguments = expr_list; - function_node->children.push_back(function_node->arguments); - node = function_node; - return true; -} - bool ParserFilterClause::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { assert(node); @@ -1572,17 +1501,9 @@ bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & e } auto name_literal = std::make_shared(name); - - auto expr_list_args = std::make_shared(); - expr_list_args->children.push_back(std::move(name_literal)); - - auto function_node = std::make_shared(); - function_node->name = "globalVariable"; - function_node->arguments = expr_list_args; - function_node->children.push_back(expr_list_args); - - node = function_node; + node = makeASTFunction("globalVariable", name_literal); node->setAlias("@@" + name); + return true; } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index e4fef4852ac..f538555f0c1 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -121,36 +121,6 @@ protected: ColumnTransformers allowed_transformers; }; -// /** A function, for example, f(x, y + 1, g(z)). -// * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. -// * Or a parametric aggregate function: quantile(0.9)(x + y). -// * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. -// * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). -// */ -// class ParserFunction : public IParserBase -// { -// public: -// explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) -// : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) -// { -// } - -// protected: -// const char * getName() const override { return "function"; } -// bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -// bool allow_function_parameters; -// bool is_table_function; -// }; - -// A special function parser for view and viewIfPermitted table functions. -// It parses an SELECT query as its argument and doesn't support getColumnName(). -class ParserTableFunctionView : public IParserBase -{ -protected: - const char * getName() const override { return "function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - // Allows to make queries like SELECT SUM() FILTER(WHERE ) FROM ... class ParserFilterClause : public IParserBase { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6d343a2c407..83f5d79717c 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -239,7 +239,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (first) { ASTPtr elem; - if (!first_elem_parser->parse(pos, elem, expected)) + if (!elem_parser->parse(pos, elem, expected)) return false; node = elem; @@ -248,16 +248,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node else { /// try to find any of the valid operators - const char ** it; - Expected stub; - for (it = overlapping_operators_to_skip; *it; ++it) - if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) - break; - - if (*it) - break; - for (it = operators; *it; it += 2) if (parseOperator(pos, *it, expected)) break; @@ -265,40 +256,13 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (!*it) break; - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - ASTPtr elem; - SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - if (comparison_expression) - { - if (ParserKeyword("ANY").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ANY; - else if (ParserKeyword("ALL").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ALL; - } - - if (subquery_function_type != SubqueryFunctionType::NONE && !ParserSubquery().parse(pos, elem, expected)) - subquery_function_type = SubqueryFunctionType::NONE; - - if (subquery_function_type == SubqueryFunctionType::NONE - && !(remaining_elem_parser ? remaining_elem_parser : first_elem_parser)->parse(pos, elem, expected)) + if (!elem_parser->parse(pos, elem, expected)) return false; /// the first argument of the function is the previous element, the second is the next one - function->name = it[1]; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(node); - exp_list->children.push_back(elem); - - if (comparison_expression && subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function, subquery_function_type)) - return false; + auto function = makeASTFunction(it[1], node, elem); /** special exception for the access operator to the element of the array `x[y]`, which * contains the infix part '[' and the suffix ''] '(specified as' [') @@ -322,35 +286,6 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node } -bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr arguments; - - if (!elem_parser->parse(pos, node, expected)) - return false; - - while (true) - { - if (!parseOperator(pos, infix, expected)) - break; - - if (!arguments) - { - node = makeASTFunction(function_name, node); - arguments = node->as().arguments; - } - - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - return false; - - arguments->children.push_back(elem); - } - - return true; -} - - ASTPtr makeBetweenOperator(bool negative, ASTs arguments) { // subject = arguments[0], left = arguments[1], right = arguments[2] @@ -401,106 +336,6 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments) } -bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - if (ParserTableFunctionView().parse(pos, node, expected)) - return true; - ParserKeyword s_settings("SETTINGS"); - if (s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (parser_settings.parse(pos, node, expected)) - return true; - } - return elem_parser.parse(pos, node, expected); -} - - -bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// try to find any of the valid operators - const char ** it; - for (it = operators; *it; it += 2) - { - if (parseOperator(pos, *it, expected)) - break; - } - - /// Let's parse chains of the form `NOT NOT x`. This is hack. - /** This is done, because among the unary operators there is only a minus and NOT. - * But for a minus the chain of unary operators does not need to be supported. - */ - size_t count = 1; - if (it[0] && 0 == strncmp(it[0], "NOT", 3)) - { - while (true) - { - const char ** jt; - for (jt = operators; *jt; jt += 2) - if (parseOperator(pos, *jt, expected)) - break; - - if (!*jt) - break; - - ++count; - } - } - - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - return false; - - if (!*it) - node = elem; - else - { - for (size_t i = 0; i < count; ++i) - { - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - function->name = it[1]; - function->arguments = exp_list; - function->children.push_back(exp_list); - - if (node) - exp_list->children.push_back(node); - else - exp_list->children.push_back(elem); - - node = function; - } - } - - return true; -} - - -bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr expr_ast; - if (!elem_parser->parse(pos, expr_ast, expected)) - return false; - - ASTPtr type_ast; - if (ParserToken(TokenType::DoubleColon).ignore(pos, expected) - && ParserDataType().parse(pos, type_ast, expected)) - { - node = createFunctionCast(expr_ast, type_ast); - } - else - { - node = expr_ast; - } - - return true; -} - - ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) : impl(std::make_unique( is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), @@ -752,11 +587,11 @@ public: elements.push_back(std::move(op)); } - virtual bool getResult(ASTPtr & op) + virtual bool getResult(ASTPtr & node) { if (elements.size() == 1) { - op = std::move(elements[0]); + node = std::move(elements[0]); return true; } @@ -922,6 +757,8 @@ public: return true; } + bool is_table_function = false; + /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int between_counter = 0; @@ -949,7 +786,7 @@ public: { } - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { /// We can exit the main cycle outside the parse() function, /// so we need to merge the element here @@ -958,7 +795,7 @@ public: if (elements.size() == 1) { - op = std::move(elements[0]); + node = std::move(elements[0]); return true; } @@ -1213,13 +1050,13 @@ private: class RoundBracketsLayer : public Layer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { // Round brackets can mean priority operator as well as function tuple() if (!is_tuple && elements.size() == 1) - op = std::move(elements[0]); + node = std::move(elements[0]); else - op = makeASTFunction("tuple", std::move(elements)); + node = makeASTFunction("tuple", std::move(elements)); return true; } @@ -1261,9 +1098,9 @@ private: class ArrayLayer : public BaseLayer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { - op = makeASTFunction("array", std::move(elements)); + node = makeASTFunction("array", std::move(elements)); return true; } @@ -1386,18 +1223,18 @@ public: class ExtractLayer : public BaseLayer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { if (state == 2) { if (elements.empty()) return false; - op = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), elements[0]); + node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), elements[0]); } else { - op = makeASTFunction("extract", std::move(elements)); + node = makeASTFunction("extract", std::move(elements)); } return true; @@ -1455,9 +1292,9 @@ private: class SubstringLayer : public Layer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { - op = makeASTFunction("substring", std::move(elements)); + node = makeASTFunction("substring", std::move(elements)); return true; } @@ -1515,12 +1352,12 @@ public: class PositionLayer : public Layer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { if (state == 2) std::swap(elements[1], elements[0]); - op = makeASTFunction("position", std::move(elements)); + node = makeASTFunction("position", std::move(elements)); return true; } @@ -1612,9 +1449,9 @@ public: { } - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { - op = makeASTFunction(function_name, std::move(elements)); + node = makeASTFunction(function_name, std::move(elements)); return true; } @@ -1776,15 +1613,15 @@ public: { } - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { if (parsed_interval_kind) { elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); - op = makeASTFunction(function_name, elements[1], elements[0]); + node = makeASTFunction(function_name, elements[1], elements[0]); } else - op = makeASTFunction(function_name, std::move(elements)); + node = makeASTFunction(function_name, std::move(elements)); return true; } @@ -1829,20 +1666,20 @@ private: class DateDiffLayer : public BaseLayer { public: - bool getResult(ASTPtr & op) override + bool getResult(ASTPtr & node) override { if (parsed_interval_kind) { if (elements.size() == 2) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); else if (elements.size() == 3) - op = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); else return false; } else { - op = makeASTFunction("dateDiff", std::move(elements)); + node = makeASTFunction("dateDiff", std::move(elements)); } return true; } @@ -1956,6 +1793,89 @@ private: IntervalKind interval_kind; }; +/// Layer for table function 'view' and 'viewIfPermitted' +class ViewLayer : public Layer +{ +public: + ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + + bool getResult(ASTPtr & node) override + { + if (if_permitted) + node = makeASTFunction("viewIfPermitted", std::move(elements)); + else + node = makeASTFunction("view", std::move(elements)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + /// view(SELECT ...) + /// viewIfPermitted(SELECT ... ELSE func(...)) + /// + /// 0. Parse the SELECT query and 'ELSE' keyword if needed + /// + + if (state == 0) + { + ASTPtr query; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + pushResult(query); + + if (!if_permitted) + { + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + finished = true; + return true; + } + + if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) + return false; + + ///auto test_pos = pos; + + // Check whether next thing is a function + // if (!ParserIdentifier(true).parse(pos, )) + // if (!ParserWithOptionalAlias{std::make_unique(true, true), true}.parse(pos, else_ast, expected)) + // return false; + + state = 1; + return true; + } + + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } + +private: + bool if_permitted; +}; + class CaseLayer : public Layer { @@ -2049,7 +1969,7 @@ private: }; -std::unique_ptr getFunctionLayer(ASTPtr identifier, bool allow_function_parameters_ = true) +std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_function, bool allow_function_parameters_ = true) { /// Special cases for expressions that look like functions but contain some syntax sugar: @@ -2078,6 +1998,14 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool allow_function_p String function_name = getIdentifierName(identifier); String function_name_lowercase = Poco::toLower(function_name); + if (is_table_function) + { + if (function_name_lowercase == "view") + return std::make_unique(false); + else if (function_name_lowercase == "viewifpermitted") + return std::make_unique(true); + } + if (function_name_lowercase == "cast") return std::make_unique(); else if (function_name_lowercase == "extract") @@ -2214,6 +2142,21 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } +bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_settings("SETTINGS"); + if (s_settings.ignore(pos, expected)) + { + ParserSetQuery parser_settings(true); + if (parser_settings.parse(pos, node, expected)) + return true; + } + + auto start = std::make_unique(); + start->is_table_function = true; + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); +} + bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto start = std::make_unique(); @@ -2230,18 +2173,13 @@ bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (is_table_function) - { - if (ParserTableFunctionView().parse(pos, node, expected)) - return true; - } - ASTPtr identifier; if (ParserIdentifier(true).parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { - auto start = getFunctionLayer(identifier, allow_function_parameters); + auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); + start->is_table_function = is_table_function; return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } else @@ -2377,6 +2315,17 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand { ASTPtr tmp; + if (typeid_cast(layers.back().get())) + { + if (identifier_parser.parse(pos, tmp, expected) + && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); + return ParseResult::OPERAND; + } + return ParseResult::ERROR; + } + /// Special case for cast expression if (layers.back()->previousType() != OperatorType::TupleElement && ParseCastExpression(pos, tmp, expected)) @@ -2462,7 +2411,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand if (pos->type == TokenType::OpeningRoundBracket) { ++pos; - layers.push_back(getFunctionLayer(tmp)); + layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); return ParseResult::OPERAND; } else diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index c79f19d9508..f12aabde45b 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -123,31 +123,13 @@ class ParserLeftAssociativeBinaryOperatorList : public IParserBase { private: Operators_t operators; - Operators_t overlapping_operators_to_skip = { (const char *[]){ nullptr } }; - ParserPtr first_elem_parser; - ParserPtr remaining_elem_parser; - /// =, !=, <, > ALL (subquery) / ANY (subquery) - bool comparison_expression = false; + ParserPtr elem_parser; public: /** `operators_` - allowed operators and their corresponding functions */ - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_) - : operators(operators_), first_elem_parser(std::move(first_elem_parser_)) - { - } - - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, - Operators_t overlapping_operators_to_skip_, ParserPtr && first_elem_parser_, bool comparison_expression_ = false) - : operators(operators_), overlapping_operators_to_skip(overlapping_operators_to_skip_), - first_elem_parser(std::move(first_elem_parser_)), comparison_expression(comparison_expression_) - { - } - - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_, - ParserPtr && remaining_elem_parser_) - : operators(operators_), first_elem_parser(std::move(first_elem_parser_)), - remaining_elem_parser(std::move(remaining_elem_parser_)) + ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && elem_parser_) + : operators(operators_), elem_parser(std::move(elem_parser_)) { } @@ -158,71 +140,6 @@ protected: }; -/** Expression with an infix operator of arbitrary arity. - * For example, a AND b AND c AND d. - */ -class ParserVariableArityOperatorList : public IParserBase -{ -private: - const char * infix; - const char * function_name; - ParserPtr elem_parser; - -public: - ParserVariableArityOperatorList(const char * infix_, const char * function_, ParserPtr && elem_parser_) - : infix(infix_), function_name(function_), elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "list, delimited by operator of variable arity"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/** An expression with a prefix unary operator. - * Example, NOT x. - */ -class ParserPrefixUnaryOperatorExpression : public IParserBase -{ -private: - Operators_t operators; - ParserPtr elem_parser; - -public: - /** `operators_` - allowed operators and their corresponding functions - */ - ParserPrefixUnaryOperatorExpression(Operators_t operators_, ParserPtr && elem_parser_) - : operators(operators_), elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "expression with prefix unary operator"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// CAST operator "::". This parser is used if left argument -/// of operator cannot be read as simple literal from text of query. -/// Example: "[1, 1 + 1, 1 + 2]::Array(UInt8)" -class ParserCastExpression : public IParserBase -{ -private: - ParserPtr elem_parser; - -public: - explicit ParserCastExpression(ParserPtr && elem_parser_) - : elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "CAST expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - /// Optional conversion to INTERVAL data type. Example: "INTERVAL x SECOND" parsed as "toIntervalSecond(x)". class ParserIntervalOperatorExpression : public IParserBase { @@ -273,9 +190,6 @@ protected: // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase { -private: - ParserExpression elem_parser; - protected: const char * getName() const override { return "table function expression"; } From 48ec96149e223e6de127cadae0afbefeaab79670 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 13 Sep 2022 21:07:15 +0000 Subject: [PATCH 080/173] Fix style --- src/Parsers/ExpressionElementParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.cpp | 26 +++++++----------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index c72125f4afa..81d6f34aced 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -199,7 +199,7 @@ ASTPtr createFunctionCast(const ASTPtr & expr_ast, const ASTPtr & type_ast) { /// Convert to canonical representation in functional form: CAST(expr, 'type') auto type_literal = std::make_shared(queryToString(type_ast)); - return makeASTFunction("CAST", expr_ast, type_literal); + return makeASTFunction("CAST", expr_ast, std::move(type_literal)); } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 83f5d79717c..000307630b3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -500,23 +500,18 @@ enum class OperatorType Lambda }; -/** Operator class stores parameters of the operator: +/** Operator struct stores parameters of the operator: * - function_name name of the function that operator will create * - priority priority of the operator relative to the other operators * - arity the amount of arguments that operator will consume * - type type of the operator that defines its behaviour */ -class Operator +struct Operator { -public: Operator() = default; - Operator(String function_name_, - Int32 priority_, - Int32 arity_ = 2, - OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) - { - } + Operator(String function_name_, Int32 priority_, Int32 arity_ = 2, OperatorType type_ = OperatorType::None) + : type(type_), priority(priority_), arity(arity_), function_name(function_name_) {} OperatorType type; Int32 priority; @@ -1814,8 +1809,8 @@ public: /// view(SELECT ...) /// viewIfPermitted(SELECT ... ELSE func(...)) /// - /// 0. Parse the SELECT query and 'ELSE' keyword if needed - /// + /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) + /// 1. Parse closing token if (state == 0) { @@ -1847,13 +1842,6 @@ public: if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) return false; - ///auto test_pos = pos; - - // Check whether next thing is a function - // if (!ParserIdentifier(true).parse(pos, )) - // if (!ParserWithOptionalAlias{std::make_unique(true, true), true}.parse(pos, else_ast, expected)) - // return false; - state = 1; return true; } @@ -2244,7 +2232,7 @@ bool ParserExpressionImpl::parse(std::unique_ptr start, IParser::Pos & po { Action next = Action::OPERAND; - std::vector> layers; + Layers layers; layers.push_back(std::move(start)); while (true) From ab4e3b831e0d25e134339dfe730c95cc97fe9081 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 14 Sep 2022 09:44:51 +0000 Subject: [PATCH 081/173] Fix TableFunction expressions --- src/Parsers/ExpressionListParsers.cpp | 52 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 000307630b3..25eabf86f3c 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -510,13 +510,13 @@ struct Operator { Operator() = default; - Operator(String function_name_, Int32 priority_, Int32 arity_ = 2, OperatorType type_ = OperatorType::None) + Operator(const std::string & function_name_, int priority_, int arity_ = 2, OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) {} OperatorType type; - Int32 priority; - Int32 arity; - String function_name; + int priority; + int arity; + std::string function_name; }; enum class Checkpoint @@ -2132,14 +2132,6 @@ bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKeyword s_settings("SETTINGS"); - if (s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (parser_settings.parse(pos, node, expected)) - return true; - } - auto start = std::make_unique(); start->is_table_function = true; return ParserExpressionImpl().parse(std::move(start), pos, node, expected); @@ -2303,15 +2295,39 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand { ASTPtr tmp; - if (typeid_cast(layers.back().get())) + if (layers.front()->is_table_function) { - if (identifier_parser.parse(pos, tmp, expected) - && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + if (typeid_cast(layers.back().get())) { - layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); - return ParseResult::OPERAND; + if (identifier_parser.parse(pos, tmp, expected) + && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); + return ParseResult::OPERAND; + } + return ParseResult::ERROR; + } + + /// Current element should be empty (there should be no other operands or operators) + /// to parse SETTINGS in table function + if (layers.back()->empty()) + { + auto old_pos = pos; + ParserKeyword s_settings("SETTINGS"); + if (s_settings.ignore(pos, expected)) + { + ParserSetQuery parser_settings(true); + if (parser_settings.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(tmp); + return ParseResult::OPERAND; + } + else + { + pos = old_pos; + } + } } - return ParseResult::ERROR; } /// Special case for cast expression From 6d01f92641a62f41b02da5f5b50d37e6e538b9aa Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 15 Sep 2022 00:48:14 +0000 Subject: [PATCH 082/173] Fix silly bug & little enhancement --- src/Parsers/ExpressionListParsers.cpp | 61 +++++++-------------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 25eabf86f3c..9e936ba209e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -288,51 +288,20 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node ASTPtr makeBetweenOperator(bool negative, ASTs arguments) { - // subject = arguments[0], left = arguments[1], right = arguments[2] - auto f_combined_expression = std::make_shared(); - auto args_combined_expression = std::make_shared(); - - /// [NOT] BETWEEN left AND right - auto f_left_expr = std::make_shared(); - auto args_left_expr = std::make_shared(); - - auto f_right_expr = std::make_shared(); - auto args_right_expr = std::make_shared(); - - args_left_expr->children.emplace_back(arguments[0]); - args_left_expr->children.emplace_back(arguments[1]); - - args_right_expr->children.emplace_back(arguments[0]); - args_right_expr->children.emplace_back(arguments[2]); + // SUBJECT = arguments[0], LEFT = arguments[1], RIGHT = arguments[2] if (negative) { - /// NOT BETWEEN - f_left_expr->name = "less"; - f_right_expr->name = "greater"; - f_combined_expression->name = "or"; + auto f_left_expr = makeASTFunction("less", arguments[0], arguments[1]); + auto f_right_expr = makeASTFunction("greater", arguments[0], arguments[2]); + return makeASTFunction("or", f_left_expr, f_right_expr); } else { - /// BETWEEN - f_left_expr->name = "greaterOrEquals"; - f_right_expr->name = "lessOrEquals"; - f_combined_expression->name = "and"; + auto f_left_expr = makeASTFunction("greaterOrEquals", arguments[0], arguments[1]); + auto f_right_expr = makeASTFunction("lessOrEquals", arguments[0], arguments[2]); + return makeASTFunction("and", f_left_expr, f_right_expr); } - - f_left_expr->arguments = args_left_expr; - f_left_expr->children.emplace_back(f_left_expr->arguments); - - f_right_expr->arguments = args_right_expr; - f_right_expr->children.emplace_back(f_right_expr->arguments); - - args_combined_expression->children.emplace_back(f_left_expr); - args_combined_expression->children.emplace_back(f_right_expr); - - f_combined_expression->arguments = args_combined_expression; - f_combined_expression->children.emplace_back(f_combined_expression->arguments); - - return f_combined_expression; } @@ -616,7 +585,7 @@ public: return operators.back().type; } - int empty() const + int isCurrentElementEmpty() const { return operators.empty() && operands.empty(); } @@ -696,7 +665,7 @@ public: if (!popOperand(node)) return false; - bool res = empty(); + bool res = isCurrentElementEmpty(); if (push_to_elements) pushResult(node); @@ -709,7 +678,7 @@ public: bool parseLambda() { // 0. If empty - create function tuple with 0 args - if (empty()) + if (isCurrentElementEmpty()) { auto function = makeASTFunction("tuple"); pushOperand(function); @@ -762,7 +731,7 @@ public: bool allow_alias_without_as_keyword = true; std::optional> saved_checkpoint; - Checkpoint current_checkpoint; + Checkpoint current_checkpoint = Checkpoint::None; protected: std::vector operators; @@ -828,7 +797,7 @@ public: { action = Action::OPERATOR; - if (!empty() || !elements.empty()) + if (!isCurrentElementEmpty() || !elements.empty()) if (!mergeElement()) return false; @@ -904,7 +873,7 @@ public: { action = Action::OPERATOR; - if (!empty() || !elements.empty()) + if (!isCurrentElementEmpty() || !elements.empty()) if (!mergeElement()) return false; @@ -1070,7 +1039,7 @@ public: { action = Action::OPERATOR; - if (!empty()) + if (!isCurrentElementEmpty()) if (!mergeElement()) return false; @@ -2310,7 +2279,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand /// Current element should be empty (there should be no other operands or operators) /// to parse SETTINGS in table function - if (layers.back()->empty()) + if (layers.back()->isCurrentElementEmpty()) { auto old_pos = pos; ParserKeyword s_settings("SETTINGS"); From 77d0971efa8e09c08675457abefc4166cd826cb3 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:46:55 +0800 Subject: [PATCH 083/173] Serialize tracing context --- src/Common/OpenTelemetryTraceContext.cpp | 31 +++++++++++++++++++++++- src/Common/OpenTelemetryTraceContext.h | 16 ++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 7a1f94926d5..af443861bea 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -227,6 +227,35 @@ String TracingContext::composeTraceparentHeader() const static_cast(trace_flags)); } +void TracingContext::deserialize(ReadBuffer & buf) +{ + if (!buf.eof() && *buf.position() == 't') + { + buf >> "tracing: " + >> this->trace_id + >> " " + >> this->span_id + >> " " + >> this->tracestate + >> " " + >> this->trace_flags + >> "\n"; + } +} + +void TracingContext::serialize(WriteBuffer & buf) const +{ + buf << "tracing: " + << this->trace_id + << " " + << this->span_id + << " " + << this->tracestate + << " " + << this->trace_flags + << "\n"; +} + const TracingContextOnThread & CurrentContext() { return current_thread_trace_context; diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 63136f8731d..20090960814 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -7,6 +7,8 @@ namespace DB struct Settings; class OpenTelemetrySpanLog; +class WriteBuffer; +class ReadBuffer; namespace OpenTelemetry { @@ -63,6 +65,9 @@ struct TracingContext { return trace_id != UUID(); } + + void deserialize(ReadBuffer & buf); + void serialize(WriteBuffer & buf) const; }; /// Tracing context kept on each thread @@ -155,7 +160,18 @@ struct SpanHolder : public Span void finish() noexcept; }; +} // End of namespace OpenTelemetry + +inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context) +{ + context.serialize(buf); + return buf; } +inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext & context) +{ + context.deserialize(buf); + return buf; } +} // End of namespace DB From 52224875e2e78d3ddfb15ce93545cc09afa97ffc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:47:43 +0800 Subject: [PATCH 084/173] Serialize tracing context to DDL log entry --- src/Interpreters/DDLTask.cpp | 4 ++++ src/Interpreters/DDLTask.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 50876ed29af..459cfc3be6f 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -94,6 +94,8 @@ String DDLLogEntry::toString() const wb << "settings: " << serializeAST(ast) << "\n"; } + wb << this->tracing_context; + return wb.str(); } @@ -132,6 +134,8 @@ void DDLLogEntry::parse(const String & data) ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth); settings.emplace(std::move(settings_ast->as()->changes)); } + + rb >> this->tracing_context; } assertEOF(rb); diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index d5990edd43f..fc85188a865 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -75,6 +76,7 @@ struct DDLLogEntry std::vector hosts; String initiator; // optional std::optional settings; + OpenTelemetry::TracingContext tracing_context; void setSettingsIfRequired(ContextPtr context); String toString() const; From 490089cc4f8baacff0113bd30d4977243b80f0f8 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:48:24 +0800 Subject: [PATCH 085/173] Copy tracing context from current thread to DDLLogEntry as parent context --- src/Interpreters/executeDDLQueryOnCluster.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 7cc4efcb64d..06a6512e21b 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -164,6 +164,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, entry.query = queryToString(query_ptr); entry.initiator = ddl_worker.getCommonHostID(); entry.setSettingsIfRequired(context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker.enqueueQuery(entry); return getDistributedDDLStatus(node_path, entry, context); From a4ef0c0281b13eac3ffee4523a08ea5f3a171da7 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:57:00 +0800 Subject: [PATCH 086/173] Set up tracing context for DDLWorker --- src/Interpreters/DDLWorker.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 6ec20ab5f5f..408fa2a28d3 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -515,6 +516,11 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query); chassert(!task.completely_processed); + /// Setup tracing context on current thread for current DDL + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + task.entry.tracing_context, + this->context->getOpenTelemetrySpanLog()); + String active_node_path = task.getActiveNodePath(); String finished_node_path = task.getFinishedNodePath(); From ac848727e5fc8aec4962d5fad25ad68c8501b1b6 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 18:00:08 +0800 Subject: [PATCH 087/173] Read tracing context in right pos --- src/Interpreters/DDLTask.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 459cfc3be6f..b867b52ac20 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -134,10 +134,10 @@ void DDLLogEntry::parse(const String & data) ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth); settings.emplace(std::move(settings_ast->as()->changes)); } - - rb >> this->tracing_context; } + rb >> this->tracing_context; + assertEOF(rb); if (!host_id_strings.empty()) From d62ba01e93661f454cac40cccc14a0f3dc135267 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Thu, 15 Sep 2022 06:25:23 -0700 Subject: [PATCH 088/173] Fix SipHash Endian issue for s390x --- src/Common/SipHash.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6162de48143..6e1138b6510 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,6 +32,11 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define CURRENT_BYTES_IDX(i) (7-i) +#else +#define CURRENT_BYTES_IDX(i) (i) +#endif class SipHash { @@ -55,7 +60,7 @@ private: ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. - current_bytes[7] = static_cast(cnt); + current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; @@ -92,7 +97,7 @@ public: { while (cnt & 7 && data < end) { - current_bytes[cnt & 7] = *data; + current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } @@ -125,13 +130,13 @@ public: current_word = 0; switch (end - data) { - case 7: current_bytes[6] = data[6]; [[fallthrough]]; - case 6: current_bytes[5] = data[5]; [[fallthrough]]; - case 5: current_bytes[4] = data[4]; [[fallthrough]]; - case 4: current_bytes[3] = data[3]; [[fallthrough]]; - case 3: current_bytes[2] = data[2]; [[fallthrough]]; - case 2: current_bytes[1] = data[1]; [[fallthrough]]; - case 1: current_bytes[0] = data[0]; [[fallthrough]]; + case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; + case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; + case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; + case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; + case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; + case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; + case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } @@ -157,8 +162,8 @@ public: void get128(char * out) { finalize(); - unalignedStoreLE(out, v0 ^ v1); - unalignedStoreLE(out + 8, v2 ^ v3); + unalignedStore(out, v0 ^ v1); + unalignedStore(out + 8, v2 ^ v3); } template From 2e2d804b90dc3071f3cff5539a47d644113d7577 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 15 Sep 2022 21:54:37 +0000 Subject: [PATCH 089/173] Better --- src/Parsers/ExpressionListParsers.cpp | 176 ++++++++++++++------------ src/Parsers/ExpressionListParsers.h | 86 ++++++------- src/Parsers/ParserCreateQuery.cpp | 4 +- 3 files changed, 135 insertions(+), 131 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 9e936ba209e..1f34d922cbe 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -304,24 +304,6 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments) } } - -ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) - : impl(std::make_unique( - is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), - allow_alias_without_as_keyword)) -{ -} - - -bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - return ParserList( - std::make_unique(allow_alias_without_as_keyword, is_table_function), - std::make_unique(TokenType::Comma)) - .parse(pos, node, expected); -} - - bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return nested_parser.parse(pos, node, expected) && !node->children.empty(); @@ -444,6 +426,7 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp enum class Action { + NONE, OPERAND, OPERATOR }; @@ -742,12 +725,14 @@ protected: }; -class SingleElementLayer : public Layer +class ExpressionLayer : public Layer { public: - SingleElementLayer() : Layer(false, false) + ExpressionLayer(bool allow_alias_, bool allow_alias_without_as_keyword_, bool is_table_function_) + : Layer(allow_alias_, allow_alias_without_as_keyword_) { + is_table_function = is_table_function_; } bool getResult(ASTPtr & node) override @@ -776,6 +761,44 @@ public: }; +class ExpressionListLayer : public Layer +{ +public: + + ExpressionListLayer(bool allow_alias_without_as_keyword_, bool is_table_function_) + : Layer(true, allow_alias_without_as_keyword_) + { + is_table_function = is_table_function_; + } + + bool getResult(ASTPtr & node) override + { + /// We can exit the main cycle outside the parse() function, + /// so we need to merge the element here + if (!mergeElement()) + return false; + + node = std::make_shared(); + node->children = std::move(elements); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & action) override + { + if (pos->type == TokenType::Comma) + { + if (!mergeElement()) + return false; + + ++pos; + action = Action::OPERAND; + } + + return true; + } +}; + /// Basic layer for a function with certain separator and end tokens: /// 1. If we parse a separator we should merge current operands and operators /// into one element and push in to 'elements' vector. @@ -1761,7 +1784,7 @@ private: class ViewLayer : public Layer { public: - ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} bool getResult(ASTPtr & node) override { @@ -2079,38 +2102,36 @@ struct ParserExpressionImpl bool parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected); - enum class ParseResult - { - OPERAND, - OPERATOR, - ERROR, - END, - }; - using Layers = std::vector>; - ParseResult tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); - static ParseResult tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); + Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); + static Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); }; + bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto start = std::make_unique(); + auto start = std::make_unique(false, false, false); return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto start = std::make_unique(); + auto start = std::make_unique(false, false, true); start->is_table_function = true; return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } -bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserExpressionWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto start = std::make_unique(); - return ParserKeyword("INTERVAL").parse(pos, node, expected) - && ParserExpressionImpl().parse(std::move(start), pos, node, expected); + auto start = std::make_unique(true, allow_alias_without_as_keyword, is_table_function); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); +} + +bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto start = std::make_unique(allow_alias_without_as_keyword, is_table_function); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -2219,21 +2240,13 @@ bool ParserExpressionImpl::parse(std::unique_ptr start, IParser::Pos & po continue; } - ParseResult result; - if (next == Action::OPERAND) - result = tryParseOperand(layers, pos, expected); + next = tryParseOperand(layers, pos, expected); else - result = tryParseOperator(layers, pos, expected); + next = tryParseOperator(layers, pos, expected); - if (result == ParseResult::END) + if (next == Action::NONE) break; - else if (result == ParseResult::ERROR) - break; - else if (result == ParseResult::OPERATOR) - next = Action::OPERATOR; - else if (result == ParseResult::OPERAND) - next = Action::OPERAND; } /// When we exit the loop we should be on the 1st level @@ -2260,7 +2273,7 @@ bool ParserExpressionImpl::parse(std::unique_ptr start, IParser::Pos & po } } -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2272,9 +2285,9 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); - return ParseResult::OPERAND; + return Action::OPERAND; } - return ParseResult::ERROR; + return Action::NONE; } /// Current element should be empty (there should be no other operands or operators) @@ -2289,7 +2302,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand if (parser_settings.parse(pos, tmp, expected)) { layers.back()->pushOperand(tmp); - return ParseResult::OPERAND; + return Action::OPERAND; } else { @@ -2304,7 +2317,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand ParseCastExpression(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); - return ParseResult::OPERATOR; + return Action::OPERATOR; } if (layers.back()->previousType() == OperatorType::Comparison) @@ -2322,17 +2335,17 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand ASTPtr function, argument; if (!layers.back()->popOperator(prev_op)) - return ParseResult::ERROR; + return Action::NONE; if (!layers.back()->popOperand(argument)) - return ParseResult::ERROR; + return Action::NONE; function = makeASTFunction(prev_op.function_name, argument, tmp); if (!modifyAST(function, subquery_function_type)) - return ParseResult::ERROR; + return Action::NONE; layers.back()->pushOperand(std::move(function)); - return ParseResult::OPERATOR; + return Action::OPERATOR; } } @@ -2347,7 +2360,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand if (cur_op != unary_operators_table.end()) { layers.back()->pushOperator(cur_op->second); - return ParseResult::OPERAND; + return Action::OPERAND; } auto old_pos = pos; @@ -2358,13 +2371,13 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand { layers.back()->saved_checkpoint = {old_pos, Checkpoint::Interval}; layers.push_back(std::make_unique()); - return ParseResult::OPERAND; + return Action::OPERAND; } else if (current_checkpoint != Checkpoint::Case && parseOperator(pos, "CASE", expected)) { layers.back()->saved_checkpoint = {old_pos, Checkpoint::Case}; layers.push_back(std::make_unique()); - return ParseResult::OPERAND; + return Action::OPERAND; } if (ParseDateOperatorExpression(pos, tmp, expected) || @@ -2385,7 +2398,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand { ++pos; layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); - return ParseResult::OPERAND; + return Action::OPERAND; } else { @@ -2401,18 +2414,18 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand if (subquery_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); - return ParseResult::OPERATOR; + return Action::OPERATOR; } ++pos; layers.push_back(std::make_unique()); - return ParseResult::OPERAND; + return Action::OPERAND; } else if (pos->type == TokenType::OpeningSquareBracket) { ++pos; layers.push_back(std::make_unique()); - return ParseResult::OPERAND; + return Action::OPERAND; } else if (mysql_global_variable_parser.parse(pos, tmp, expected)) { @@ -2420,13 +2433,13 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperand } else { - return ParseResult::END; + return Action::NONE; } - return ParseResult::OPERATOR; + return Action::OPERATOR; } -typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) { ASTPtr tmp; @@ -2437,7 +2450,7 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperato Expected stub; for (const char ** it = overlapping_operators_to_skip; *it; ++it) if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) - return ParseResult::END; + return Action::NONE; /// Try to find operators from 'operators_table' auto cur_op = operators_table.begin(); @@ -2452,11 +2465,11 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperato if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) { if (!layers.back()->insertAlias(tmp)) - return ParseResult::ERROR; + return Action::NONE; - return ParseResult::OPERATOR; + return Action::OPERATOR; } - return ParseResult::END; + return Action::NONE; } auto op = cur_op->second; @@ -2464,10 +2477,10 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperato if (op.type == OperatorType::Lambda) { if (!layers.back()->parseLambda()) - return ParseResult::ERROR; + return Action::NONE; layers.back()->pushOperator(op); - return ParseResult::OPERAND; + return Action::OPERAND; } /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator @@ -2495,16 +2508,16 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperato { Operator tmp_op; if (!layers.back()->popOperator(tmp_op)) - return ParseResult::ERROR; + return Action::NONE; if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) - return ParseResult::ERROR; + return Action::NONE; bool negative = tmp_op.type == OperatorType::StartNotBetween; ASTs arguments; if (!layers.back()->popLastNOperands(arguments, 3)) - return ParseResult::ERROR; + return Action::NONE; function = makeBetweenOperator(negative, arguments); } @@ -2513,33 +2526,34 @@ typename ParserExpressionImpl::ParseResult ParserExpressionImpl::tryParseOperato function = makeASTFunction(prev_op.function_name); if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) - return ParseResult::ERROR; + return Action::NONE; } layers.back()->pushOperand(function); } + layers.back()->pushOperator(op); if (op.type == OperatorType::ArrayElement) layers.push_back(std::make_unique()); - ParseResult next = ParseResult::OPERAND; + Action next = Action::OPERAND; /// isNull & isNotNull are postfix unary operators if (op.type == OperatorType::IsNull) - next = ParseResult::OPERATOR; + next = Action::OPERATOR; if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) layers.back()->between_counter++; if (op.type == OperatorType::Cast) { - next = ParseResult::OPERATOR; + next = Action::OPERATOR; ASTPtr type_ast; if (!ParserDataType().parse(pos, type_ast, expected)) - return ParseResult::ERROR; + return Action::NONE; layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index f12aabde45b..f16742b5854 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -116,48 +116,7 @@ private: SelectUnionModes union_modes; }; -/** An expression with an infix binary left-associative operator. - * For example, a + b - c + d. - */ -class ParserLeftAssociativeBinaryOperatorList : public IParserBase -{ -private: - Operators_t operators; - ParserPtr elem_parser; -public: - /** `operators_` - allowed operators and their corresponding functions - */ - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && elem_parser_) - : operators(operators_), elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "list, delimited by binary operators"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/// Optional conversion to INTERVAL data type. Example: "INTERVAL x SECOND" parsed as "toIntervalSecond(x)". -class ParserIntervalOperatorExpression : public IParserBase -{ -protected: - const char * getName() const override { return "INTERVAL operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserExpression : public IParserBase -{ -protected: - const char * getName() const override { return "lambda expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// TODO: ? class ParserArray : public IParserBase { protected: @@ -187,6 +146,39 @@ protected: }; +/** An expression with an infix binary left-associative operator. + * For example, a + b - c + d. + */ +class ParserLeftAssociativeBinaryOperatorList : public IParserBase +{ +private: + Operators_t operators; + ParserPtr elem_parser; + +public: + /** `operators_` - allowed operators and their corresponding functions + */ + ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && elem_parser_) + : operators(operators_), elem_parser(std::move(elem_parser_)) + { + } + +protected: + const char * getName() const override { return "list, delimited by binary operators"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserExpression : public IParserBase +{ +protected: + const char * getName() const override { return "lambda expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase { @@ -200,16 +192,14 @@ protected: class ParserExpressionWithOptionalAlias : public IParserBase { public: - explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false); + explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} protected: - ParserPtr impl; + bool allow_alias_without_as_keyword; + bool is_table_function; const char * getName() const override { return "expression with optional alias"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return impl->parse(pos, node, expected); - } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 2349b781259..fc90f9ce3ed 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -858,8 +858,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserStorage storage_p; ParserStorage storage_inner; ParserTablePropertiesDeclarationList table_properties_p; - ParserIntervalOperatorExpression watermark_p; - ParserIntervalOperatorExpression lateness_p; + ParserExpression watermark_p; + ParserExpression lateness_p; ParserSelectWithUnionQuery select_p; ASTPtr table; From 36c3a18035013753a457b9394516b54c938fba8a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 16 Sep 2022 02:36:11 +0000 Subject: [PATCH 090/173] Fix tests --- src/Parsers/ExpressionListParsers.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 1f34d922cbe..42bf8f2053b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -697,11 +697,15 @@ public: return false; if (auto * ast_with_alias = dynamic_cast(operands.back().get())) - tryGetIdentifierNameInto(node, ast_with_alias->alias); - else - return false; + { + if (ast_with_alias->alias.empty()) + { + tryGetIdentifierNameInto(node, ast_with_alias->alias); + return true; + } + } - return true; + return false; } bool is_table_function = false; @@ -775,8 +779,9 @@ public: { /// We can exit the main cycle outside the parse() function, /// so we need to merge the element here - if (!mergeElement()) - return false; + if (!isCurrentElementEmpty() || !elements.empty()) + if (!mergeElement()) + return false; node = std::make_shared(); node->children = std::move(elements); @@ -2462,13 +2467,13 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po if (cur_op == operators_table.end()) { + auto old_pos = pos; if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) { - if (!layers.back()->insertAlias(tmp)) - return Action::NONE; - - return Action::OPERATOR; + if (layers.back()->insertAlias(tmp)) + return Action::OPERATOR; } + pos = old_pos; return Action::NONE; } From f63b2d4b9f232b85af11f753092ba5fd3147617f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 16 Sep 2022 03:29:34 +0000 Subject: [PATCH 091/173] Revert some breaking changes --- src/Parsers/ExpressionListParsers.cpp | 82 +++++++-------------------- src/Parsers/ExpressionListParsers.h | 12 ++-- 2 files changed, 28 insertions(+), 66 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 42bf8f2053b..0a5f6f98725 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -304,6 +304,22 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments) } } +ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) + : impl(std::make_unique( + is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), + allow_alias_without_as_keyword)) +{ +} + + +bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList( + std::make_unique(allow_alias_without_as_keyword, is_table_function), + std::make_unique(TokenType::Comma)) + .parse(pos, node, expected); +} + bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return nested_parser.parse(pos, node, expected) && !node->children.empty(); @@ -698,11 +714,8 @@ public: if (auto * ast_with_alias = dynamic_cast(operands.back().get())) { - if (ast_with_alias->alias.empty()) - { - tryGetIdentifierNameInto(node, ast_with_alias->alias); - return true; - } + tryGetIdentifierNameInto(node, ast_with_alias->alias); + return true; } return false; @@ -733,8 +746,7 @@ class ExpressionLayer : public Layer { public: - ExpressionLayer(bool allow_alias_, bool allow_alias_without_as_keyword_, bool is_table_function_) - : Layer(allow_alias_, allow_alias_without_as_keyword_) + ExpressionLayer(bool is_table_function_) : Layer(false, false) { is_table_function = is_table_function_; } @@ -765,45 +777,6 @@ public: }; -class ExpressionListLayer : public Layer -{ -public: - - ExpressionListLayer(bool allow_alias_without_as_keyword_, bool is_table_function_) - : Layer(true, allow_alias_without_as_keyword_) - { - is_table_function = is_table_function_; - } - - bool getResult(ASTPtr & node) override - { - /// We can exit the main cycle outside the parse() function, - /// so we need to merge the element here - if (!isCurrentElementEmpty() || !elements.empty()) - if (!mergeElement()) - return false; - - node = std::make_shared(); - node->children = std::move(elements); - - return true; - } - - bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & action) override - { - if (pos->type == TokenType::Comma) - { - if (!mergeElement()) - return false; - - ++pos; - action = Action::OPERAND; - } - - return true; - } -}; - /// Basic layer for a function with certain separator and end tokens: /// 1. If we parse a separator we should merge current operands and operators /// into one element and push in to 'elements' vector. @@ -2116,26 +2089,13 @@ struct ParserExpressionImpl bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto start = std::make_unique(false, false, false); + auto start = std::make_unique(false); return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto start = std::make_unique(false, false, true); - start->is_table_function = true; - return ParserExpressionImpl().parse(std::move(start), pos, node, expected); -} - -bool ParserExpressionWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto start = std::make_unique(true, allow_alias_without_as_keyword, is_table_function); - return ParserExpressionImpl().parse(std::move(start), pos, node, expected); -} - -bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto start = std::make_unique(allow_alias_without_as_keyword, is_table_function); + auto start = std::make_unique(true); return ParserExpressionImpl().parse(std::move(start), pos, node, expected); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index f16742b5854..653654e5a33 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -192,14 +192,16 @@ protected: class ParserExpressionWithOptionalAlias : public IParserBase { public: - explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) - : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} + explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false); protected: - bool allow_alias_without_as_keyword; - bool is_table_function; + ParserPtr impl; const char * getName() const override { return "expression with optional alias"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + return impl->parse(pos, node, expected); + } }; From db85f97a3fca56d0b909da624660ba6d1c84e393 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 16 Sep 2022 08:55:58 +0000 Subject: [PATCH 092/173] Fix --- src/Parsers/ExpressionListParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 0a5f6f98725..f7a016a59e4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -746,7 +746,7 @@ class ExpressionLayer : public Layer { public: - ExpressionLayer(bool is_table_function_) : Layer(false, false) + explicit ExpressionLayer(bool is_table_function_) : Layer(false, false) { is_table_function = is_table_function_; } From f3e8738145b6505a8cc2f48f01bb5767a6d9ea9c Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Sat, 17 Sep 2022 19:48:08 -0700 Subject: [PATCH 093/173] Fixed issues in code review --- src/Common/SipHash.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6e1138b6510..281a65ca36a 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,8 +32,10 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array +/// to ensure correct byte order on different endian machines #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define CURRENT_BYTES_IDX(i) (7-i) +#define CURRENT_BYTES_IDX(i) (7 - i) #else #define CURRENT_BYTES_IDX(i) (i) #endif @@ -230,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } + +#undef CURRENT_BYTES_IDX From 2ae43bb4e8a8890629f36a0bbc3d5a0229f463ab Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:11:27 +0800 Subject: [PATCH 094/173] Add test case Signed-off-by: Frank Chen --- src/Common/OpenTelemetryTraceContext.cpp | 12 +-- .../02423_ddl_for_opentelemetry.reference | 8 ++ .../02423_ddl_for_opentelemetry.sh | 92 +++++++++++++++++++ 3 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference create mode 100755 tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 314118201bf..0a64900db9b 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -232,11 +232,11 @@ void TracingContext::deserialize(ReadBuffer & buf) { buf >> "tracing: " >> this->trace_id - >> " " + >> "\n" >> this->span_id - >> " " + >> "\n" >> this->tracestate - >> " " + >> "\n" >> this->trace_flags >> "\n"; } @@ -246,11 +246,11 @@ void TracingContext::serialize(WriteBuffer & buf) const { buf << "tracing: " << this->trace_id - << " " + << "\n" << this->span_id - << " " + << "\n" << this->tracestate - << " " + << "\n" << this->trace_flags << "\n"; } diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference new file mode 100644 index 00000000000..19b2fe09a20 --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -0,0 +1,8 @@ +1 +1 +2 +===case 2==== +1 +1 +exception_code=60 +exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh new file mode 100755 index 00000000000..272eaf4e345 --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Tags: distributed + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# This function takes following arguments: +# $1 - OpenTelemetry Trace Id +# $2 - Query +# $3 - Query Settings +# $4 - Output device, default is stdout +function execute_query() +{ + if [ -n "${4}" ]; then + output=$4 + else + output="/dev/stdout" + fi + + echo $2 | ${CLICKHOUSE_CURL} \ + -X POST \ + -H "traceparent: 00-$1-5150000000000515-01" \ + -H "tracestate: a\nb cd" \ + "${CLICKHOUSE_URL}?${3}" \ + --data @- \ + > $output +} + +# This function takes 3 argument: +# $1 - OpenTelemetry Trace Id +# $2 - Fields +# $3 - operation_name pattern +function check_span() +{ +${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + SELECT ${2} + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${1}' + AND operation_name like '${3}' + ;" +} + +# +# Set up +# +${CLICKHOUSE_CLIENT} -q " +DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +" + +# +# Case 1, a normal case +# +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); +execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none" + +check_span $trace_id "count()" "HTTPHandler" +check_span $trace_id "count()" "%DDLWorker::processTask%" + +# There should be two 'query' spans, +# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker +check_span $trace_id "count()" "query" + +# Echo a separator so that the reference file is more clear for reading +echo "===case 2====" + +# +# Case 2, an exceptional case, DROP a non-exist table +# +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); + +# Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. +# The exception will be checked in the span log +execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" + +check_span $trace_id "count()" "HTTPHandler" +check_span $trace_id "count()" "%DDLWorker::processTask%" + +# There should be two 'query' spans, +# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. +# Both of these two spans contain exception +check_span $trace_id "concat('exception_code=', attribute['clickhouse.exception_code'])" "query" + +# +# Tear down +# +${CLICKHOUSE_CLIENT} -q " +DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +" \ No newline at end of file From 06ae2fb2b581d6a5eb14c639c8cfbb1fe73353be Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:20:58 +0800 Subject: [PATCH 095/173] Remove assertEOF to improve the compability Signed-off-by: Frank Chen --- src/Interpreters/DDLTask.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index b867b52ac20..e33617d59f5 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -138,8 +138,6 @@ void DDLLogEntry::parse(const String & data) rb >> this->tracing_context; - assertEOF(rb); - if (!host_id_strings.empty()) { hosts.resize(host_id_strings.size()); From 37ae7a8cca56ebbbda0802b2c411ac0fb571687b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:25:27 -0700 Subject: [PATCH 096/173] Kusto-phase1 : apply parser comments to kusto, remove unused variable --- src/Interpreters/executeQuery.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e61494792b0..1a7c5032b02 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -389,12 +389,11 @@ static std::tuple executeQueryImpl( String query_table; try { - const Dialect & dialect = settings.dialect; - - if (dialect == Dialect::kusto && !internal) + if (settings.dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } else From ec852b3faa418765dc3201b893e3ae265663d144 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:38:07 -0700 Subject: [PATCH 097/173] Kusto-phase1 : change the parser in ClientBase from shared_ptr to unique_ptr --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f87487dff7c..f407fab68f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::shared_ptr parser; + std::unique_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -304,9 +304,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); else - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { From e478079f076f9e17240a906e1a3a8c156ac0afbb Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:46:09 +0800 Subject: [PATCH 098/173] Add test cases for different distributed_ddl_entry_format_version Signed-off-by: Frank Chen --- .../02423_ddl_for_opentelemetry.reference | 9 ++++ .../02423_ddl_for_opentelemetry.sh | 41 ++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 19b2fe09a20..68152d602cf 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,8 +1,17 @@ +===case 1==== 1 1 2 ===case 2==== 1 1 +2 +===case 3==== +1 +1 +2 +===case 4==== +1 +1 exception_code=60 exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 272eaf4e345..551e8b3c723 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -51,29 +51,42 @@ ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " +case_no=1; + # -# Case 1, a normal case +# normal cases for ALL distributed_ddl_entry_format_version # -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none" +for ddl_version in 1 2 3; do + # Echo a separator so that the reference file is more clear for reading + echo "===case ${case_no}====" -check_span $trace_id "count()" "HTTPHandler" -check_span $trace_id "count()" "%DDLWorker::processTask%" + trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" -# There should be two 'query' spans, -# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker -check_span $trace_id "count()" "query" + check_span $trace_id "count()" "HTTPHandler" + check_span $trace_id "count()" "%DDLWorker::processTask%" + # There should be two 'query' spans, + # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker + check_span $trace_id "count()" "query" + + # Remove table + ${CLICKHOUSE_CLIENT} -q " + DROP TABLE IF EXISTS ddl_test_for_opentelemetry; + " + + case_no=$(($case_no + 1)) +done + +# +# an exceptional case, DROP a non-exist table +# # Echo a separator so that the reference file is more clear for reading -echo "===case 2====" - -# -# Case 2, an exceptional case, DROP a non-exist table -# -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); +echo "===case ${case_no}====" # Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. # The exception will be checked in the span log +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" check_span $trace_id "count()" "HTTPHandler" From b056bc1021f3c3f2e7a6ad79690be7afb8f8c955 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 13:43:40 +0800 Subject: [PATCH 099/173] Add span for executeDDLQueryOnCluster Signed-off-by: Frank Chen --- src/Interpreters/executeDDLQueryOnCluster.cpp | 4 ++++ .../queries/0_stateless/02423_ddl_for_opentelemetry.reference | 4 ++++ tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 ++ 3 files changed, 10 insertions(+) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 06a6512e21b..016a740a7bc 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -55,6 +55,8 @@ bool isSupportedAlterType(int type) BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params) { + OpenTelemetry::SpanHolder span(__FUNCTION__); + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); @@ -88,6 +90,8 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, cluster = context->getCluster(query->cluster); } + span.addAttribute("clickhouse.cluster", query->cluster); + /// TODO: support per-cluster grant context->checkAccess(AccessType::CLUSTER); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 68152d602cf..09c15e5098e 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,17 +1,21 @@ ===case 1==== 1 1 +test_shard_localhost 2 ===case 2==== 1 1 +test_shard_localhost 2 ===case 3==== 1 1 +test_shard_localhost 2 ===case 4==== 1 1 +test_shard_localhost exception_code=60 exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 551e8b3c723..043a968104d 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -65,6 +65,7 @@ for ddl_version in 1 2 3; do check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" + check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker @@ -91,6 +92,7 @@ execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUS check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" +check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. From 6f956329d5a96ee786a1d3aa34d902534b5ab424 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Mon, 19 Sep 2022 15:26:11 +0200 Subject: [PATCH 100/173] Remove obsolete comment from the config.xml Remove obsolete comment, see commit c059d0a0ee1e13c73cdefb821cb40aa01f6981c1 --- programs/server/config.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index a1e139d9e76..fef45c19d37 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1106,10 +1106,6 @@ system asynchronous_metric_log
- 7000
From a89140ae98dc187c354471d91ded302da31e1d9c Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 20 Sep 2022 10:37:54 +0800 Subject: [PATCH 101/173] Fix style Signed-off-by: Frank Chen --- src/Interpreters/DDLWorker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 3dc390785ef..8873d851de1 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -517,8 +517,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , - task.entry.tracing_context, + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); String active_node_path = task.getActiveNodePath(); From a852644de2bda4eea4d90c6d30f0391da846f51b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 20 Sep 2022 11:33:06 +0200 Subject: [PATCH 102/173] Update runners to a recent version to install on 22.04 --- tests/ci/worker/ubuntu_ami_for_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/worker/ubuntu_ami_for_ci.sh b/tests/ci/worker/ubuntu_ami_for_ci.sh index c5bc090d8d8..f2d0a7f0300 100644 --- a/tests/ci/worker/ubuntu_ami_for_ci.sh +++ b/tests/ci/worker/ubuntu_ami_for_ci.sh @@ -3,7 +3,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.293.0 +export RUNNER_VERSION=2.296.2 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { From 21afe65e8e10cd0a2f916ca0562fd3152c3042dc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 10:54:40 +0800 Subject: [PATCH 103/173] Print content if diff says inputs are binary files Signed-off-by: Frank Chen --- tests/clickhouse-test | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 14cf4d0674a..f59ce0fa046 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -864,7 +864,13 @@ class TestCase: ], stdout=PIPE, universal_newlines=True, - ).communicate()[0] + ).communicate()[0] + if diff.startswith("Binary files "): + diff += "Content of stdout:\n===================\n" + file = open(self.stdout_file, "r") + diff += str(file.read()) + file.close() + diff += "===================" description += f"\n{diff}\n" if debug_log: description += "\n" From 4c1a062375367a64e306204a721ac42d10e1c62f Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 14:11:10 +0800 Subject: [PATCH 104/173] Fix style Signed-off-by: Frank Chen --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f59ce0fa046..79428d74c47 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -864,7 +864,7 @@ class TestCase: ], stdout=PIPE, universal_newlines=True, - ).communicate()[0] + ).communicate()[0] if diff.startswith("Binary files "): diff += "Content of stdout:\n===================\n" file = open(self.stdout_file, "r") From 971cef8bd214d4cbe521d0e318d750307ff0cac8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Sep 2022 08:39:32 +0000 Subject: [PATCH 105/173] Provide x86 SIMD flag options only on x86 --- cmake/cpu_features.cmake | 81 +++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 218b4deedce..9c986bff7c3 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -11,49 +11,68 @@ cmake_push_check_state () # All of them are unrelated to the instruction set at the host machine # (you can compile for newer instruction set on old machines and vice versa). -option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) -option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) -option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) -option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) -option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) -option (ENABLE_AVX "Use AVX instructions on x86_64" 0) -option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) -option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) -option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) -option (ENABLE_BMI "Use BMI instructions on x86_64" 0) -option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) -option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) - -# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware. -option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0) -if (NO_SSE3_OR_HIGHER) - SET(ENABLE_SSSE3 0) - SET(ENABLE_SSE41 0) - SET(ENABLE_SSE42 0) - SET(ENABLE_PCLMULQDQ 0) - SET(ENABLE_POPCNT 0) - SET(ENABLE_AVX 0) - SET(ENABLE_AVX2 0) - SET(ENABLE_AVX512 0) - SET(ENABLE_AVX512_VBMI 0) - SET(ENABLE_BMI 0) - SET(ENABLE_AVX2_FOR_SPEC_OP 0) - SET(ENABLE_AVX512_FOR_SPEC_OP 0) -endif() - option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) if (ARCH_NATIVE) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") elseif (ARCH_AARCH64) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc+simd+crypto+dotprod+ssbs") + # ARM publishes almost every year a new revision of it's ISA [1]. Each revision comes with new mandatory and optional features from + # which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum + # compatibility intended to run on all 64-bit ARM hardware released after 2013 (e.g. Raspberry Pi 4), and one for modern ARM server + # CPUs, (e.g. Graviton). + # + # [1] https://en.wikipedia.org/wiki/AArch64 + option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." 0) + + if (NO_ARMV81_OR_HIGHER) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8") + else () + # ARMv8.2 is ancient but the baseline for Graviton 2 and 3 processors [1]. In particular, it includes LSE (first made mandatory with + # ARMv8.1) which provides nice speedups without having to fall back to v8.0 "-moutline-atomics" compat flag [2, 3, 4] that imposes + # a recent glibc with runtime dispatch helper, limiting our ability to run on old OSs. + # + # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md + # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 + # [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/ + # [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+crc+simd+crypto+dotprod+ssbs") + endif () elseif (ARCH_PPC64LE) # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") elseif (ARCH_AMD64) + option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) + option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) + option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) + option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) + option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) + option (ENABLE_AVX "Use AVX instructions on x86_64" 0) + option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) + option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) + option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) + option (ENABLE_BMI "Use BMI instructions on x86_64" 0) + option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) + option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) + + option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64 for maximum compatibility with older/embedded hardware." 0) + if (NO_SSE3_OR_HIGHER) + SET(ENABLE_SSSE3 0) + SET(ENABLE_SSE41 0) + SET(ENABLE_SSE42 0) + SET(ENABLE_PCLMULQDQ 0) + SET(ENABLE_POPCNT 0) + SET(ENABLE_AVX 0) + SET(ENABLE_AVX2 0) + SET(ENABLE_AVX512 0) + SET(ENABLE_AVX512_VBMI 0) + SET(ENABLE_BMI 0) + SET(ENABLE_AVX2_FOR_SPEC_OP 0) + SET(ENABLE_AVX512_FOR_SPEC_OP 0) + endif() + set (TEST_FLAG "-mssse3") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" From 020f30950f14de51e4b2b7579444b49f1aef3097 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 19:56:32 +0800 Subject: [PATCH 106/173] Suppress the output to see if the test passes Signed-off-by: Frank Chen --- tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 043a968104d..84aa747fc56 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -61,7 +61,7 @@ for ddl_version in 1 2 3; do echo "===case ${case_no}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" From bcaa66c80460bc32409c66acb56df7104c2d5c00 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Sep 2022 13:09:24 +0000 Subject: [PATCH 107/173] Add arm-v80compat builds --- .github/workflows/master.yml | 46 ++++++++++++++++++++++++++++++ .github/workflows/pull_request.yml | 44 ++++++++++++++++++++++++++++ docker/packager/packager | 10 +++++++ tests/ci/ci_config.py | 10 +++++++ 4 files changed, 110 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6f2fd5d678d..3d22cb984dd 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -887,6 +887,51 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAarch64V80Compat: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_aarch64_v80compat + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -972,6 +1017,7 @@ jobs: # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 + - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 24a1c6bb714..2795dc62d6d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -940,6 +940,49 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAarch64V80Compat: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_aarch64_v80compat + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1025,6 +1068,7 @@ jobs: # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 + - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/docker/packager/packager b/docker/packager/packager index 9da787e9006..b4aa4ebdd91 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -128,6 +128,7 @@ def parse_env_variables( DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" + ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" AMD64_SSE2_SUFFIX = "-amd64sse2" @@ -140,6 +141,7 @@ def parse_env_variables( is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) + is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX) @@ -178,6 +180,13 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" ) result.append("DEB_ARCH=arm64") + elif is_cross_arm_v80compat: + cc = compiler[: -len(ARM_V80COMPAT_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" + ) + cmake_flags.append("-DNO_ARMV81_OR_HIGHER=1") + result.append("DEB_ARCH=arm64") elif is_cross_freebsd: cc = compiler[: -len(FREEBSD_SUFFIX)] cmake_flags.append( @@ -343,6 +352,7 @@ if __name__ == "__main__": "clang-15-darwin", "clang-15-darwin-aarch64", "clang-15-aarch64", + "clang-15-aarch64-v80compat", "clang-15-ppc64le", "clang-15-amd64sse2", "clang-15-freebsd", diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a31f2298a58..19513491b1e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -131,6 +131,15 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_aarch64_v80compat": { + "compiler": "clang-15-aarch64-v80compat", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, "binary_freebsd": { "compiler": "clang-15-freebsd", "build_type": "", @@ -189,6 +198,7 @@ CI_CONFIG = { "binary_shared", "binary_darwin", "binary_aarch64", + "binary_aarch64_v80compat", "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", From cfd8d4e1f12bef0103387008b9487e8237e27510 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Sep 2022 14:49:38 +0000 Subject: [PATCH 108/173] Add CRC32 to compat build --- cmake/cpu_features.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 9c986bff7c3..2660244055e 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -26,7 +26,9 @@ elseif (ARCH_AARCH64) option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." 0) if (NO_ARMV81_OR_HIGHER) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8") + # In v8.0, crc32 is optional, in v8.1 it's mandatory. Enable it regardless as __crc32()* is used in lot's of places and even very + # old ARM CPUs support it. + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc") else () # ARMv8.2 is ancient but the baseline for Graviton 2 and 3 processors [1]. In particular, it includes LSE (first made mandatory with # ARMv8.1) which provides nice speedups without having to fall back to v8.0 "-moutline-atomics" compat flag [2, 3, 4] that imposes From f0ffc785c022976ef93bf029c9387c7aa23aa5b9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Sep 2022 16:59:52 +0200 Subject: [PATCH 109/173] Remove assertion --- src/Common/ThreadStatus.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 5309aad6ebb..b62a7af6c71 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -145,8 +145,6 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { - chassert(current_thread == this); - memory_tracker.adjustWithUntrackedMemory(untracked_memory); if (thread_group) From 6798b500e9e05cdfbc22ac86830833248890e8df Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 21 Sep 2022 15:12:16 +0000 Subject: [PATCH 110/173] Wait on startup for Keeper --- programs/server/Server.cpp | 12 +++++++++++- src/Coordination/KeeperServer.cpp | 2 +- .../test_keeper_and_access_storage/test.py | 2 -- tests/integration/test_keeper_auth/test.py | 3 --- tests/integration/test_keeper_back_to_back/test.py | 2 -- .../integration/test_keeper_incorrect_config/test.py | 2 -- .../integration/test_keeper_internal_secure/test.py | 3 --- tests/integration/test_keeper_mntr_pressure/test.py | 1 - .../test_keeper_multinode_blocade_leader/test.py | 7 ++++++- .../integration/test_keeper_multinode_simple/test.py | 10 +++++++++- tests/integration/test_keeper_nodes_move/test.py | 2 -- tests/integration/test_keeper_nodes_remove/test.py | 2 -- tests/integration/test_keeper_persistent_log/test.py | 4 ---- .../test_keeper_restore_from_snapshot/test.py | 1 - tests/integration/test_keeper_secure_client/test.py | 1 - .../integration/test_keeper_snapshot_on_exit/test.py | 2 -- tests/integration/test_keeper_snapshots/test.py | 1 - .../test_keeper_snapshots_multinode/test.py | 1 - .../test_keeper_three_nodes_start/test.py | 2 -- .../test_keeper_three_nodes_two_alive/test.py | 9 ++++----- .../test_keeper_two_nodes_cluster/test.py | 7 ++++++- .../test_keeper_zookeeper_converter/test.py | 1 - 22 files changed, 37 insertions(+), 40 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 40b4b646b6e..8a0ce75ca70 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1282,8 +1282,18 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(/* start_async */ true); + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 42d7d967b1f..08092cf68f1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -705,7 +705,7 @@ void KeeperServer::waitInit() int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) - throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); + LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } std::vector KeeperServer::getDeadSessions() diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index 72e3582979b..6ec307f7082 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,7 +3,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -16,7 +15,6 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node1) yield cluster finally: diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index e1331c35eeb..364d93dfc53 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -26,7 +25,6 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -457,7 +455,6 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() - keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 5ae71841004..73fface02b4 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -62,7 +61,6 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index ec8b14a01e9..95482745b31 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -225,6 +224,5 @@ def test_invalid_configs(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() - keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2448a426fe2..2d45e95e4ff 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -48,8 +47,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 1468aa01896..d351b238ead 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -31,7 +31,6 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index 06a5cd8dc5a..a7a80d90a58 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -45,7 +45,6 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -65,10 +64,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + # in extremely rare case it can take more than 5 minutes in debug build with sanitizer @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -272,6 +276,7 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index b8bdb098c0d..1dcbb290fa8 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -33,7 +33,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -45,6 +44,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -55,6 +58,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -96,6 +100,7 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -152,6 +157,7 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -193,6 +199,7 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -217,6 +224,7 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): + wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index c816d69e2d1..1e3bd95c5e7 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -34,8 +34,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 03536f07064..59bdaadf2e2 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import time import os from kazoo.client import KazooClient, KazooState @@ -25,7 +24,6 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index d7cc79836a7..70cc14fe26d 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -33,8 +32,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) - yield cluster finally: @@ -51,7 +48,6 @@ def get_connection_zk(nodename, timeout=30.0): def restart_clickhouse(): node.restart_clickhouse(kill=True) - keeper_utils.wait_until_connected(cluster, node) def test_state_after_restart(started_cluster): diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7f2c2e89703..bc33689dd20 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -25,7 +25,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 81584129052..2a17afac75b 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import string import os import time diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 933e83414a4..1ca5888ab4d 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -28,7 +27,6 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index a27ca6f92a5..ce57a852dca 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -36,7 +36,6 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 52d4ae71e33..a68a34dae2e 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -29,7 +29,6 @@ def wait_nodes(): def started_cluster(): try: cluster.start() - wait_nodes() yield cluster diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index c8476568786..e451f969b37 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,7 +3,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,7 +31,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index 591dde6a70a..bd29ded357f 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -40,7 +40,6 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -77,10 +76,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -113,10 +112,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index b87dcf6e758..c6bc0ebd33a 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -30,7 +30,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -42,6 +41,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -52,6 +55,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -83,6 +87,7 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index e459078f8ef..af8d1ca4bf9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -12,7 +12,6 @@ from kazoo.exceptions import ( ) import os import time -import socket cluster = ClickHouseCluster(__file__) From 49c4f1f9c6821d4c4f273c69a233b750d9699d3d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Sep 2022 21:02:48 +0000 Subject: [PATCH 111/173] Document flags --- cmake/cpu_features.cmake | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 2660244055e..f9b2f103f49 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -17,7 +17,7 @@ if (ARCH_NATIVE) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") elseif (ARCH_AARCH64) - # ARM publishes almost every year a new revision of it's ISA [1]. Each revision comes with new mandatory and optional features from + # ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from # which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum # compatibility intended to run on all 64-bit ARM hardware released after 2013 (e.g. Raspberry Pi 4), and one for modern ARM server # CPUs, (e.g. Graviton). @@ -26,19 +26,35 @@ elseif (ARCH_AARCH64) option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." 0) if (NO_ARMV81_OR_HIGHER) - # In v8.0, crc32 is optional, in v8.1 it's mandatory. Enable it regardless as __crc32()* is used in lot's of places and even very - # old ARM CPUs support it. + # crc32 is optional in v8.0 and mandatory in v8.1. Enable it as __crc32()* is used in lot's of places and even very old ARM CPUs + # support it. set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc") else () - # ARMv8.2 is ancient but the baseline for Graviton 2 and 3 processors [1]. In particular, it includes LSE (first made mandatory with - # ARMv8.1) which provides nice speedups without having to fall back to v8.0 "-moutline-atomics" compat flag [2, 3, 4] that imposes - # a recent glibc with runtime dispatch helper, limiting our ability to run on old OSs. + # ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it + # includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag + # "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on + # old OSs. + # + # simd: NEON, introduced as optional in v8.0, A few extensions were added with v8.1 but it's still not mandatory. Enables the + # compiler to auto-vectorize. + # sve: Scalable Vector Extensions, introduced as optional in v8.2. Available in Graviton 3 but not in Graviton 2, and most likely + # also not in CI machines. Compiler support for autovectorization is rudimentary at the time of writing, see [5]. Can be + # enabled one-fine-day (TM) but not now. + # ssbs: "Speculative Store Bypass Safe". Optional in v8.0, mandatory in v8.5. Meltdown/spectre countermeasure. + # crypto: SHA1, SHA256, AES. Optional in v8.0. In v8.4, further algorithms were added but it's still optional, see [6]. + # dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits + # but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was + # introduced as optional, either in v8.2 [7] or in v8.4 [8]. # # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 # [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/ # [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+crc+simd+crypto+dotprod+ssbs") + # [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support + # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en + # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html + # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs") endif () elseif (ARCH_PPC64LE) From 558aed814295f836e3df89d0dc1dd8bd0a7b7109 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 22 Sep 2022 14:30:38 +0800 Subject: [PATCH 112/173] Tag test case not executed under replicated database Signed-off-by: Frank Chen --- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index b2b5ae89105..50248cf01a1 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed +# Tags: distributed, no-replicated-database set -ue From 223c1230b6260faba055329f611a5ed03cbf6cee Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Thu, 22 Sep 2022 15:48:22 +0800 Subject: [PATCH 113/173] Optimize the lock contentions for ThreadGroupStatus::mutex The release of ThreadGroupStatus::finished_threads_counters_memory via the getProfileEventsCountersAndMemoryForThreads method brings lots of lock contentions for ThreadGroupStatus::mutex and lowers the overall performance. This commit optimizes this performance issue by replacing the method call with an equivalent but more lightweight code block. --- src/Interpreters/ThreadStatusExt.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index b1f5749da25..9a4152415af 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -350,7 +350,10 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) /// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory /// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads()) - thread_group->getProfileEventsCountersAndMemoryForThreads(); + { + std::lock_guard guard(thread_group->mutex); + auto stats = std::move(thread_group->finished_threads_counters_memory); + } thread_group.reset(); From 60a5ff91e5a9ddbe577657659e415b1e321f9353 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Sep 2022 13:19:45 +0000 Subject: [PATCH 114/173] Add test for setting output_format_json_validate_utf8 --- .../02452_json_utf8_validation.reference | 174 ++++++++++++++++++ .../02452_json_utf8_validation.sql | 42 +++++ 2 files changed, 216 insertions(+) create mode 100644 tests/queries/0_stateless/02452_json_utf8_validation.reference create mode 100644 tests/queries/0_stateless/02452_json_utf8_validation.sql diff --git a/tests/queries/0_stateless/02452_json_utf8_validation.reference b/tests/queries/0_stateless/02452_json_utf8_validation.reference new file mode 100644 index 00000000000..c7155832e1e --- /dev/null +++ b/tests/queries/0_stateless/02452_json_utf8_validation.reference @@ -0,0 +1,174 @@ +JSONCompact +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + ["� �"] + ], + + "rows": 1 +} +JSON +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + { + "s": "� �" + } + ], + + "rows": 1 +} +XML + + + + + + s + String + + + + + + � � + + + 1 + +JSONColumnsWithMetadata +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + { + "s": ["� �"] + }, + + "rows": 1 +} +JSONEachRow +{"s":"� �"} +JSONCompactEachRow +["� �"] +JSONColumns +{ + "s": ["� �"] +} +JSONCompactColumns +[ + ["� �"] +] +JSONObjectEachRow +{ + "row_1": {"s":"� �"} +} +JSONCompact +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + ["� �"] + ], + + "rows": 1 +} +JSON +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + { + "s": "� �" + } + ], + + "rows": 1 +} +XML + + + + + + s + String + + + + + + � � + + + 1 + +JSONColumnsWithMetadata +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + { + "s": ["� �"] + }, + + "rows": 1 +} +JSONEachRow +{"s":" "} +JSONCompactEachRow +[" "] +JSONColumns +{ + "s": [" "] +} +JSONCompactColumns +[ + [" "] +] +JSONObjectEachRow +{ + "row_1": {"s":" "} +} diff --git a/tests/queries/0_stateless/02452_json_utf8_validation.sql b/tests/queries/0_stateless/02452_json_utf8_validation.sql new file mode 100644 index 00000000000..e0ddbcdc919 --- /dev/null +++ b/tests/queries/0_stateless/02452_json_utf8_validation.sql @@ -0,0 +1,42 @@ +SET output_format_write_statistics = 0; +SET output_format_json_validate_utf8 = 1; +SELECT 'JSONCompact'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompact; +SELECT 'JSON'; +SELECT '\xED\x20\xA8' AS s FORMAT JSON; +SELECT 'XML'; +SELECT '\xED\x20\xA8' AS s FORMAT XML; +SELECT 'JSONColumnsWithMetadata'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumnsWithMetadata; +SELECT 'JSONEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONEachRow; +SELECT 'JSONCompactEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactEachRow; +SELECT 'JSONColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumns; +SELECT 'JSONCompactColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactColumns; +SELECT 'JSONObjectEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONObjectEachRow; + +SET output_format_json_validate_utf8 = 0; +SELECT 'JSONCompact'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompact; +SELECT 'JSON'; +SELECT '\xED\x20\xA8' AS s FORMAT JSON; +SELECT 'XML'; +SELECT '\xED\x20\xA8' AS s FORMAT XML; +SELECT 'JSONColumnsWithMetadata'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumnsWithMetadata; +SELECT 'JSONEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONEachRow; +SELECT 'JSONCompactEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactEachRow; +SELECT 'JSONColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumns; +SELECT 'JSONCompactColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactColumns; +SELECT 'JSONObjectEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONObjectEachRow; + + From cf97827b81173c3a678ae8bfec3b2aeececb596f Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 22 Sep 2022 23:03:28 +0800 Subject: [PATCH 115/173] Revert "Tag test case not executed under replicated database" This reverts commit 558aed814295f836e3df89d0dc1dd8bd0a7b7109. --- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 50248cf01a1..b2b5ae89105 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed, no-replicated-database +# Tags: distributed set -ue From 40f9e0b69a13ecacc97e35e83b4bf29573d7102a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 10:08:42 +0800 Subject: [PATCH 116/173] Address review comments Signed-off-by: Frank Chen --- src/Common/OpenTelemetryTraceContext.cpp | 21 +++++++++------------ src/Common/OpenTelemetryTraceContext.h | 4 ++-- src/Interpreters/DDLTask.cpp | 3 ++- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 0a64900db9b..3e7a172bdb2 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -228,18 +228,15 @@ String TracingContext::composeTraceparentHeader() const void TracingContext::deserialize(ReadBuffer & buf) { - if (!buf.eof() && *buf.position() == 't') - { - buf >> "tracing: " - >> this->trace_id - >> "\n" - >> this->span_id - >> "\n" - >> this->tracestate - >> "\n" - >> this->trace_flags - >> "\n"; - } + buf >> "tracing: " + >> this->trace_id + >> "\n" + >> this->span_id + >> "\n" + >> this->tracestate + >> "\n" + >> this->trace_flags + >> "\n"; } void TracingContext::serialize(WriteBuffer & buf) const diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 20090960814..03bac2891fc 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -160,7 +160,7 @@ struct SpanHolder : public Span void finish() noexcept; }; -} // End of namespace OpenTelemetry +} inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context) { @@ -174,4 +174,4 @@ inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext return buf; } -} // End of namespace DB +} diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index e33617d59f5..aff47db8242 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -136,7 +136,8 @@ void DDLLogEntry::parse(const String & data) } } - rb >> this->tracing_context; + if (!rb.eof() && *rb.position() == 't') + rb >> this->tracing_context; if (!host_id_strings.empty()) { From 2344e0738e9ed65061273103f791dca56d9f42ab Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:32:21 +0800 Subject: [PATCH 117/173] Keep compatibility during upgrading --- src/Common/OpenTelemetryTraceContext.cpp | 6 ++--- src/Interpreters/DDLTask.cpp | 32 ++++++++++++++++-------- src/Interpreters/DDLTask.h | 5 ++++ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 3e7a172bdb2..515060803d6 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -228,8 +228,7 @@ String TracingContext::composeTraceparentHeader() const void TracingContext::deserialize(ReadBuffer & buf) { - buf >> "tracing: " - >> this->trace_id + buf >> this->trace_id >> "\n" >> this->span_id >> "\n" @@ -241,8 +240,7 @@ void TracingContext::deserialize(ReadBuffer & buf) void TracingContext::serialize(WriteBuffer & buf) const { - buf << "tracing: " - << this->trace_id + buf << this->trace_id << "\n" << this->span_id << "\n" diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index aff47db8242..73105ae003e 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -50,21 +50,26 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { - constexpr UInt64 max_version = 2; - if (version == 0 || max_version < version) + if (version == 0 + /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be version 3 + || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION + || version > MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}." - "Maximum supported version is {}", version, max_version); + "Maximum supported version is {}", version, MAX_VERSION); } void DDLLogEntry::setSettingsIfRequired(ContextPtr context) { - version = context->getSettingsRef().distributed_ddl_entry_format_version; + version = context->getSettingsRef(). ; + if (version <= 0 || version > MAX_VERSION) + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}." + "Maximum supported version is {}.", version, MAX_VERSION); /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION) version = SETTINGS_IN_ZK_VERSION; - if (version == SETTINGS_IN_ZK_VERSION) + if (version >= SETTINGS_IN_ZK_VERSION) settings.emplace(context->getSettingsRef().changes()); } @@ -94,7 +99,8 @@ String DDLLogEntry::toString() const wb << "settings: " << serializeAST(ast) << "\n"; } - wb << this->tracing_context; + if (version >= OPENTELEMETRY_ENABLED_VERSION) + wb << "tracing: " << this->tracing_context; return wb.str(); } @@ -108,7 +114,7 @@ void DDLLogEntry::parse(const String & data) Strings host_id_strings; rb >> "query: " >> escape >> query >> "\n"; - if (version == 1) + if (version == OLDEST_VERSION) { rb >> "hosts: " >> host_id_strings >> "\n"; @@ -117,9 +123,8 @@ void DDLLogEntry::parse(const String & data) else initiator.clear(); } - else if (version == 2) + else if (version >= SETTINGS_IN_ZK_VERSION) { - if (!rb.eof() && *rb.position() == 'h') rb >> "hosts: " >> host_id_strings >> "\n"; if (!rb.eof() && *rb.position() == 'i') @@ -136,8 +141,13 @@ void DDLLogEntry::parse(const String & data) } } - if (!rb.eof() && *rb.position() == 't') - rb >> this->tracing_context; + if (version >= OPENTELEMETRY_ENABLED_VERSION) + { + if (!rb.eof() && *rb.position() == 't') + rb >> "tracing: " >> this->tracing_context; + } + + assertEOF(rb); if (!host_id_strings.empty()) { diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index fc85188a865..7217ee2b98b 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -70,6 +70,11 @@ struct DDLLogEntry static constexpr const UInt64 OLDEST_VERSION = 1; static constexpr const UInt64 SETTINGS_IN_ZK_VERSION = 2; static constexpr const UInt64 NORMALIZE_CREATE_ON_INITIATOR_VERSION = 3; + static constexpr const UInt64 OPENTELEMETRY_ENABLED_VERSION = 4; + /// Add new version here + + /// Remember to update the value below once new version is added + static constexpr const UInt64 MAX_VERSION = 4; UInt64 version = 1; String query; From 7489a95f0be86a0ba26236a99744ee42ed3d5e91 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:33:00 +0800 Subject: [PATCH 118/173] Update test case to satisfy the ddl_format_version --- .../02423_ddl_for_opentelemetry.reference | 47 +++++++++-------- .../02423_ddl_for_opentelemetry.sh | 50 ++++++++++++------- 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 09c15e5098e..b6fb5738337 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,21 +1,26 @@ -===case 1==== -1 -1 -test_shard_localhost -2 -===case 2==== -1 -1 -test_shard_localhost -2 -===case 3==== -1 -1 -test_shard_localhost -2 -===case 4==== -1 -1 -test_shard_localhost -exception_code=60 -exception_code=60 +===ddl_format_version 1==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 2==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 3==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 4==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=1 +query=2 +===exception==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=1 +exceptionCode=60 +exceptionCode=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 84aa747fc56..3f1dc53a20b 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -31,8 +31,15 @@ function execute_query() # $1 - OpenTelemetry Trace Id # $2 - Fields # $3 - operation_name pattern +# $4 - extra condition function check_span() { + if [ -n "$4" ]; then + extra_condition=" AND ${4}" + else + extra_condition="" + fi + ${CLICKHOUSE_CLIENT} -nq " SYSTEM FLUSH LOGS; @@ -41,6 +48,8 @@ ${CLICKHOUSE_CLIENT} -nq " WHERE finish_date >= yesterday() AND lower(hex(trace_id)) = '${1}' AND operation_name like '${3}' + ${extra_condition} + Format TSKV ;" } @@ -51,53 +60,56 @@ ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " -case_no=1; +# Support Replicated database engine +cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'") # -# normal cases for ALL distributed_ddl_entry_format_version +# Normal cases for ALL distributed_ddl_entry_format_version. +# Only format_version 4 enables the tracing # -for ddl_version in 1 2 3; do +for ddl_version in 1 2 3 4; do # Echo a separator so that the reference file is more clear for reading - echo "===case ${case_no}====" + echo "===ddl_format_version ${ddl_version}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" - check_span $trace_id "count()" "HTTPHandler" - check_span $trace_id "count()" "%DDLWorker::processTask%" - check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" + check_span $trace_id "count() AS httpHandler" "HTTPHandler" + check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" + check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" - # There should be two 'query' spans, - # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker - check_span $trace_id "count()" "query" + # For format_version 4, there should be two 'query' spans, + # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # + # For other format, there should be only one 'query' span + # + check_span $trace_id "count() AS query" "query" # Remove table ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " - - case_no=$(($case_no + 1)) done # # an exceptional case, DROP a non-exist table # # Echo a separator so that the reference file is more clear for reading -echo "===case ${case_no}====" +echo "===exception====" # Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. # The exception will be checked in the span log trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" +execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" "/dev/null" -check_span $trace_id "count()" "HTTPHandler" -check_span $trace_id "count()" "%DDLWorker::processTask%" -check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" +check_span $trace_id "count() AS httpHandler" "HTTPHandler" +check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" +check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. # Both of these two spans contain exception -check_span $trace_id "concat('exception_code=', attribute['clickhouse.exception_code'])" "query" +check_span $trace_id "attribute['clickhouse.exception_code'] AS exceptionCode" "query" # # Tear down From 45e3d7d7889c50ce4ee6910974100d6c26e54e13 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:36:06 +0800 Subject: [PATCH 119/173] Update name/comments --- src/Interpreters/DDLTask.cpp | 12 ++++++------ src/Interpreters/DDLTask.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 73105ae003e..c4c8ceae454 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -51,19 +51,19 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { if (version == 0 - /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be version 3 + /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION - || version > MAX_VERSION) + || version > DDL_ENTRY_FORMAT_MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}." - "Maximum supported version is {}", version, MAX_VERSION); + "Maximum supported version is {}", version, DDL_ENTRY_FORMAT_MAX_VERSION); } void DDLLogEntry::setSettingsIfRequired(ContextPtr context) { - version = context->getSettingsRef(). ; - if (version <= 0 || version > MAX_VERSION) + version = context->getSettingsRef().distributed_ddl_entry_format_version; + if (version <= 0 || version > DDL_ENTRY_FORMAT_MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}." - "Maximum supported version is {}.", version, MAX_VERSION); + "Maximum supported version is {}.", version, DDL_ENTRY_FORMAT_MAX_VERSION); /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION) diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 7217ee2b98b..661cee84a45 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -74,7 +74,7 @@ struct DDLLogEntry /// Add new version here /// Remember to update the value below once new version is added - static constexpr const UInt64 MAX_VERSION = 4; + static constexpr const UInt64 DDL_ENTRY_FORMAT_MAX_VERSION = 4; UInt64 version = 1; String query; From 34bcb6a82bbe11a9e57f36984dd29b82cdb57cde Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:48:50 +0800 Subject: [PATCH 120/173] Fix style Signed-off-by: Frank Chen --- src/Interpreters/DDLTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index c4c8ceae454..2d609c00406 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -50,7 +50,7 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { - if (version == 0 + if (version == 0 /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION || version > DDL_ENTRY_FORMAT_MAX_VERSION) From a17a3e1de13dedb33586c994a360628b167c34f0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 23 Sep 2022 08:23:57 +0000 Subject: [PATCH 121/173] Ignore Keeper hardware errors --- docker/test/stress/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 25bc64261f3..bb3312bd369 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -454,6 +454,7 @@ else -e "This engine is deprecated and is not supported in transactions" \ -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ + -e "ReplicatedMergeTreeAttachThread.*Initialization failed. Error" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv From 1d93c56d1a83f80a46b79ae3937963585dab6d2d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 23 Sep 2022 10:54:16 +0000 Subject: [PATCH 122/173] Collect logs using clickhouse-local --- docker/test/stress/run.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 25bc64261f3..990f168f788 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -508,12 +508,9 @@ grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: mv /var/log/clickhouse-server/stderr.log /test_output/ -# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts. -# (so that clickhouse-local --path can read it w/o extra care). -sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql -for table in query_log trace_log; do - sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql - tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||: +for table in query_log trace_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: done # Write check result into check_status.tsv From 2c16232b02e6b0ee2ca5b2e3bf713133e4a0a11e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 23 Sep 2022 14:39:05 +0200 Subject: [PATCH 123/173] fix part removal retries --- .../MergeTree/DataPartStorageOnDisk.cpp | 87 +++++++++++-------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 5245bc89e0c..7f04d8b85d4 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -223,54 +223,69 @@ void DataPartStorageOnDisk::remove( /// NOTE relative_path can contain not only part name itself, but also some prefix like /// "moving/all_1_1_1" or "detached/all_2_3_5". We should handle this case more properly. - if (part_dir_without_slash.has_parent_path()) - { - auto parent_path = part_dir_without_slash.parent_path(); - if (parent_path == "detached") - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", part_dir, root_path); - - part_dir_without_slash = parent_path / ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); - } - else - { - part_dir_without_slash = ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); - } + /// File might be already renamed on previous try + bool has_delete_prefix = part_dir_without_slash.filename().string().starts_with("delete_tmp_"); + std::optional can_remove_description; + auto disk = volume->getDisk(); fs::path to = fs::path(root_path) / part_dir_without_slash; - std::optional can_remove_description; - - auto disk = volume->getDisk(); - if (disk->exists(to)) + if (!has_delete_prefix) { - LOG_WARNING(log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); + if (part_dir_without_slash.has_parent_path()) + { + auto parent_path = part_dir_without_slash.parent_path(); + if (parent_path == "detached") + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", + part_dir, + root_path); + + part_dir_without_slash = parent_path / ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); + } + else + { + part_dir_without_slash = ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); + } + + to = fs::path(root_path) / part_dir_without_slash; + + if (disk->exists(to)) + { + LOG_WARNING(log, "Directory {} (to which part must be renamed before removing) already exists. " + "Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); + try + { + can_remove_description.emplace(can_remove_callback()); + disk->removeSharedRecursive( + fs::path(to) / "", !can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove); + } + catch (...) + { + LOG_ERROR( + log, "Cannot recursively remove directory {}. Exception: {}", fullPath(disk, to), getCurrentExceptionMessage(false)); + throw; + } + } + try { - can_remove_description.emplace(can_remove_callback()); - disk->removeSharedRecursive(fs::path(to) / "", !can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove); + disk->moveDirectory(from, to); + onRename(root_path, part_dir_without_slash); } - catch (...) + catch (const fs::filesystem_error & e) { - LOG_ERROR(log, "Cannot recursively remove directory {}. Exception: {}", fullPath(disk, to), getCurrentExceptionMessage(false)); + if (e.code() == std::errc::no_such_file_or_directory) + { + LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. " + "Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, to)); + return; + } throw; } } - try - { - disk->moveDirectory(from, to); - onRename(root_path, part_dir_without_slash); - } - catch (const fs::filesystem_error & e) - { - if (e.code() == std::errc::no_such_file_or_directory) - { - LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, to)); - return; - } - throw; - } - if (!can_remove_description) can_remove_description.emplace(can_remove_callback()); From 372539704043a9a205f9796fcc39559224481a9c Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 23 Sep 2022 15:24:10 +0200 Subject: [PATCH 124/173] Try fix azure tests --- .../IO/WriteBufferFromAzureBlobStorage.cpp | 70 +++++++++++-------- .../IO/WriteBufferFromAzureBlobStorage.h | 24 ++++--- .../test.py | 3 - 3 files changed, 55 insertions(+), 42 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 8130d742ee5..fcede7d17e9 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -16,14 +16,13 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t max_single_part_upload_size_, size_t buf_size_, - const WriteSettings & write_settings_, - std::optional> attributes_) + const WriteSettings & write_settings_) : BufferWithOwnMemory(buf_size_, nullptr, 0) - , blob_container_client(blob_container_client_) + , log(&Poco::Logger::get("WriteBufferFromAzureBlobStorage")) , max_single_part_upload_size(max_single_part_upload_size_) , blob_path(blob_path_) , write_settings(write_settings_) - , attributes(attributes_) + , blob_container_client(blob_container_client_) { } @@ -33,63 +32,72 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() finalize(); } -void WriteBufferFromAzureBlobStorage::finalizeImpl() +void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries) { - if (attributes.has_value()) + auto can_retry_exception = [&](const Exception & e, size_t i) -> bool { - auto blob_client = blob_container_client->GetBlobClient(blob_path); - Azure::Storage::Metadata metadata; - for (const auto & [key, value] : *attributes) - metadata[key] = value; - blob_client.SetMetadata(metadata); - } + if (i == num_tries - 1) + return false; - const size_t max_tries = 3; - for (size_t i = 0; i < max_tries; ++i) + LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {}", i + 1, blob_path, e.Message); + return true; + }; + + for (size_t i = 0; i < num_tries; ++i) { try { - next(); + func(); break; } + catch (const Azure::Core::Http::TransportException & e) + { + if (!can_retry_exception(e, i)) + throw; + } catch (const Azure::Core::RequestFailedException & e) { - if (i == max_tries - 1) + if (!can_retry_exception(e, i)) throw; - LOG_INFO(&Poco::Logger::get("WriteBufferFromAzureBlobStorage"), - "Exception caught during finalizing azure storage write at attempt {}: {}", i + 1, e.Message); } } } +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + execWithRetry([this](){ next(); }, 3); +} + void WriteBufferFromAzureBlobStorage::nextImpl() { if (!offset()) return; - auto * buffer_begin = working_buffer.begin(); - auto len = offset(); + char * buffer_begin = working_buffer.begin(); + size_t total_size = offset(); + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - size_t read = 0; + size_t current_size = 0; std::vector block_ids; - while (read < len) + + while (current_size < total_size) { - auto part_len = std::min(len - read, max_single_part_upload_size); + size_t part_len = std::min(total_size - current_size, max_single_part_upload_size); + const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); - auto block_id = getRandomASCIIString(64); - block_ids.push_back(block_id); + Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + current_size), part_len); + execWithRetry([&block_blob_client, &block_id, &tmp_buffer](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, 3); - Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + read), part_len); - block_blob_client.StageBlock(block_id, tmp_buffer); - - read += part_len; + current_size += part_len; + LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path); } - block_blob_client.CommitBlockList(block_ids); + execWithRetry([&block_blob_client, &block_ids](){ block_blob_client.CommitBlockList(block_ids); }, 3); + LOG_TRACE(log, "Commited {} blocks for blob `{}`", block_ids.size(), blob_path); if (write_settings.remote_throttler) - write_settings.remote_throttler->add(read); + write_settings.remote_throttler->add(total_size); } } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 0005705e68c..8bfd23a6379 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -13,20 +13,25 @@ #include +namespace Poco +{ +class Logger; +} + namespace DB { class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory { public: + using AzureClientPtr = std::shared_ptr; WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + AzureClientPtr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, size_t buf_size_, - const WriteSettings & write_settings_, - std::optional> attributes_ = {}); + const WriteSettings & write_settings_); ~WriteBufferFromAzureBlobStorage() override; @@ -34,12 +39,15 @@ public: private: void finalizeImpl() override; + void execWithRetry(std::function func, size_t num_tries); - std::shared_ptr blob_container_client; - size_t max_single_part_upload_size; - const String blob_path; - WriteSettings write_settings; - std::optional> attributes; + Poco::Logger * log; + + const size_t max_single_part_upload_size; + const std::string blob_path; + const WriteSettings write_settings; + + AzureClientPtr blob_container_client; }; } diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 4df47ec036b..bc755220c2f 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -4,9 +4,6 @@ import os import pytest -# FIXME Test is temporarily disabled due to flakyness -# https://github.com/ClickHouse/ClickHouse/issues/39700 - pytestmark = pytest.mark.skip from helpers.cluster import ClickHouseCluster From 81aa9b9199a5d6d97d3de3cee8e6262a6796d611 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 23 Sep 2022 15:34:39 +0200 Subject: [PATCH 125/173] Update WriteBufferFromAzureBlobStorage.cpp --- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index fcede7d17e9..b57e12d842d 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -94,7 +94,7 @@ void WriteBufferFromAzureBlobStorage::nextImpl() } execWithRetry([&block_blob_client, &block_ids](){ block_blob_client.CommitBlockList(block_ids); }, 3); - LOG_TRACE(log, "Commited {} blocks for blob `{}`", block_ids.size(), blob_path); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); if (write_settings.remote_throttler) write_settings.remote_throttler->add(total_size); From f2cbe9722334e9238e8ef31d8c306aa8b90e99fa Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 23 Sep 2022 13:28:29 -0400 Subject: [PATCH 126/173] reformat cell tower example doc --- .../example-datasets/cell-towers.md | 83 +++++++++++++++---- 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index e31ce3de5ce..eaa9cdfde88 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -16,19 +16,21 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4 1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB). 2. Validate the integrity (optional step): -``` +```bash md5sum cell_towers.csv.xz +``` +```response 8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz ``` 3. Decompress it with the following command: -``` +```bash xz -d cell_towers.csv.xz ``` 4. Create a table: -``` +```sql CREATE TABLE cell_towers ( radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5), @@ -50,7 +52,7 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created); ``` 5. Insert the dataset: -``` +```bash clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv ``` @@ -58,9 +60,10 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t 1. A number of cell towers by type: -``` +```sql SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC - +``` +```response ┌─radio─┬────────c─┐ │ UMTS │ 20686487 │ │ LTE │ 12101148 │ @@ -74,9 +77,10 @@ SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC 2. Cell towers by [mobile country code (MCC)](https://en.wikipedia.org/wiki/Mobile_country_code): -``` +```sql SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 - +``` +```response ┌─mcc─┬─count()─┐ │ 310 │ 5024650 │ │ 262 │ 2622423 │ @@ -104,21 +108,72 @@ Using `pointInPolygon` function. 1. Create a table where we will store polygons: -``` -CREATE TEMPORARY TABLE moscow (polygon Array(Tuple(Float64, Float64))); +```sql +CREATE TEMPORARY TABLE +moscow (polygon Array(Tuple(Float64, Float64))); ``` 2. This is a rough shape of Moscow (without "new Moscow"): -``` -INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), (37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), (37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), (37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), (37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), (37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), (37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), (37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), (37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), (37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), (37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), (37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), (37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), (37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), (37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), (37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), (37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), (37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), (37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), (37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), (37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), (37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), (37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), (37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), (37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), (37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), (37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), (37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), (37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), (37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), (37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), (37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), (37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), (37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), (37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), (37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), (37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), (37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), (37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), (37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), (37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), (37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), (37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), (37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), (37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), (37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), (37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), (37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), (37.84172564285271, 55.78000432402266)]); +```sql +INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), +(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), +(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), +(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), +(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), +(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), +(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), +(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), +(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), +(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), +(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), +(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), +(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), +(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), +(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), +(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), +(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), +(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), +(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), +(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), +(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), +(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), +(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), +(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), +(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), +(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), +(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), +(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), +(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), +(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), +(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), +(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), +(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), +(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), +(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), +(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), +(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), +(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), +(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), +(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), +(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), +(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), +(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), +(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), +(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), +(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), +(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), +(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), +(37.84172564285271, 55.78000432402266)]); ``` 3. Check how many cell towers are in Moscow: +```sql +SELECT count() FROM cell_towers +WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) ``` -SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) - +```response ┌─count()─┐ │ 310463 │ └─────────┘ From 30726721ad341d6738013c31b66593a4152c975c Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 23 Sep 2022 19:35:16 +0200 Subject: [PATCH 127/173] Fix threadpool reader (for local fs) --- src/Disks/IO/createReadBufferFromFileBase.cpp | 8 +- src/Disks/ObjectStorages/IObjectStorage.cpp | 2 +- src/IO/AsynchronousReadBufferFromFile.cpp | 9 +- src/IO/AsynchronousReadBufferFromFile.h | 10 +-- ...ynchronousReadBufferFromFileDescriptor.cpp | 6 +- ...AsynchronousReadBufferFromFileDescriptor.h | 4 +- src/Interpreters/Context.cpp | 84 ++++++++++++++++--- src/Interpreters/Context.h | 10 ++- 8 files changed, 101 insertions(+), 32 deletions(-) diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index d87144dee55..f42194b9052 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -63,6 +63,10 @@ std::unique_ptr createReadBufferFromFileBase( } } + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + auto create = [&](size_t buffer_size, int actual_flags) { std::unique_ptr res; @@ -77,13 +81,13 @@ std::unique_ptr createReadBufferFromFileBase( } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { - static AsynchronousReaderPtr reader = std::make_shared(); + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { - static AsynchronousReaderPtr reader = std::make_shared(16, 1000000); + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 41eb56559e0..9d6610ee326 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -20,7 +20,7 @@ IAsynchronousReader & IObjectStorage::getThreadPoolReader() if (!context) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); - return context->getThreadPoolReader(); + return context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); } ThreadPool & IObjectStorage::getThreadPoolWriter() diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp index f22001cdddf..8310d80b461 100644 --- a/src/IO/AsynchronousReadBufferFromFile.cpp +++ b/src/IO/AsynchronousReadBufferFromFile.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size, @@ -32,7 +32,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( char * existing_memory, size_t alignment, std::optional file_size_) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_) , file_name(file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -58,7 +58,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int & fd_, const std::string & original_file_name, @@ -66,7 +66,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( char * existing_memory, size_t alignment, std::optional file_size_) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, fd_, buf_size, existing_memory, alignment, file_size_) , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) { fd_ = -1; @@ -105,4 +105,3 @@ AsynchronousReadBufferFromFileWithDescriptorsCache::~AsynchronousReadBufferFromF } - diff --git a/src/IO/AsynchronousReadBufferFromFile.h b/src/IO/AsynchronousReadBufferFromFile.h index 96834350bab..1b7eeec4f19 100644 --- a/src/IO/AsynchronousReadBufferFromFile.h +++ b/src/IO/AsynchronousReadBufferFromFile.h @@ -14,7 +14,7 @@ protected: public: explicit AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, @@ -25,7 +25,7 @@ public: /// Use pre-opened file descriptor. explicit AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, @@ -45,7 +45,6 @@ public: } }; - /** Similar to AsynchronousReadBufferFromFile but also transparently shares open file descriptors. */ class AsynchronousReadBufferFromFileWithDescriptorsCache : public AsynchronousReadBufferFromFileDescriptor @@ -56,7 +55,7 @@ private: public: AsynchronousReadBufferFromFileWithDescriptorsCache( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, @@ -64,7 +63,7 @@ public: char * existing_memory = nullptr, size_t alignment = 0, std::optional file_size_ = std::nullopt) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_) , file_name(file_name_) { file = OpenedFileCache::instance().get(file_name, flags); @@ -80,4 +79,3 @@ public: }; } - diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index 1bf889540eb..c7e2f9b0c41 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -54,7 +54,7 @@ std::future AsynchronousReadBufferFromFileDescripto return std::async(std::launch::deferred, [] { return IAsynchronousReader::Result{.size = 0, .offset = 0}; }); } - return reader->submit(request); + return reader.submit(request); } @@ -140,7 +140,7 @@ void AsynchronousReadBufferFromFileDescriptor::finalize() AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescriptor( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int fd_, size_t buf_size, @@ -148,7 +148,7 @@ AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescript size_t alignment, std::optional file_size_) : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_) - , reader(std::move(reader_)) + , reader(reader_) , priority(priority_) , required_alignment(alignment) , fd(fd_) diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index 7ba842997f4..0659bb203a8 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -16,7 +16,7 @@ namespace DB class AsynchronousReadBufferFromFileDescriptor : public ReadBufferFromFileBase { protected: - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; Memory<> prefetch_buffer; @@ -36,7 +36,7 @@ protected: public: AsynchronousReadBufferFromFileDescriptor( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 401b823e6c6..1294920ce30 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include #include @@ -228,7 +230,10 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) mutable std::unique_ptr message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) - mutable std::unique_ptr threadpool_reader; + mutable std::unique_ptr asynchronous_remote_fs_reader; + mutable std::unique_ptr asynchronous_local_fs_reader; + mutable std::unique_ptr synchronous_local_fs_reader; + mutable std::unique_ptr threadpool_writer; mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches @@ -316,13 +321,41 @@ struct ContextSharedPart : boost::noncopyable { /// Wait for thread pool for background reads and writes, /// since it may use per-user MemoryTracker which will be destroyed here. - if (threadpool_reader) + if (asynchronous_remote_fs_reader) { try { - LOG_DEBUG(log, "Desctructing threadpool reader"); - threadpool_reader->wait(); - threadpool_reader.reset(); + LOG_DEBUG(log, "Desctructing remote fs threadpool reader"); + asynchronous_remote_fs_reader->wait(); + asynchronous_remote_fs_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (asynchronous_local_fs_reader) + { + try + { + LOG_DEBUG(log, "Desctructing local fs threadpool reader"); + asynchronous_local_fs_reader->wait(); + asynchronous_local_fs_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (synchronous_local_fs_reader) + { + try + { + LOG_DEBUG(log, "Desctructing local fs threadpool reader"); + synchronous_local_fs_reader->wait(); + synchronous_local_fs_reader.reset(); } catch (...) { @@ -3405,21 +3438,48 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } -IAsynchronousReader & Context::getThreadPoolReader() const +IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const { const auto & config = getConfigRef(); auto lock = getLock(); - if (!shared->threadpool_reader) + switch (type) { - auto pool_size = config.getUInt(".threadpool_reader_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_reader_queue_size", 1000000); + case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: + { + if (!shared->asynchronous_remote_fs_reader) + { + auto pool_size = config.getUInt(".threadpool_remote_fs_reader_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_remote_fs_reader_queue_size", 1000000); - shared->threadpool_reader = std::make_unique(pool_size, queue_size); + shared->asynchronous_remote_fs_reader = std::make_unique(pool_size, queue_size); + } + + return *shared->asynchronous_remote_fs_reader; + } + case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->asynchronous_local_fs_reader) + { + auto pool_size = config.getUInt(".threadpool_local_fs_reader_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_local_fs_reader_queue_size", 1000000); + + shared->asynchronous_local_fs_reader = std::make_unique(pool_size, queue_size); + } + + return *shared->asynchronous_local_fs_reader; + } + case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->synchronous_local_fs_reader) + { + shared->synchronous_local_fs_reader = std::make_unique(); + } + + return *shared->synchronous_local_fs_reader; + } } - - return *shared->threadpool_reader; } ThreadPool & Context::getThreadPoolWriter() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 76d34eff597..fa9c56018a9 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1011,7 +1011,15 @@ public: OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; - IAsynchronousReader & getThreadPoolReader() const; + enum class FilesystemReaderType + { + SYNCHRONOUS_LOCAL_FS_READER, + ASYNCHRONOUS_LOCAL_FS_READER, + ASYNCHRONOUS_REMOTE_FS_READER, + }; + + IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; + ThreadPool & getThreadPoolWriter() const; /** Get settings for reading from filesystem. */ From a999212082600c8d78d7466215c97fa8393bd78a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Sat, 24 Sep 2022 11:34:42 +0800 Subject: [PATCH 128/173] Update test cases to support both Replicated and non-Replicated database engine Signed-off-by: Frank Chen --- src/Databases/DatabaseReplicated.cpp | 2 + .../02423_ddl_for_opentelemetry.reference | 41 +++-- .../02423_ddl_for_opentelemetry.sh | 141 ++++++++++++------ 3 files changed, 114 insertions(+), 70 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f1bf56e2beb..507320fffde 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex entry.query = queryToString(query); entry.initiator = ddl_worker->getCommonHostID(); entry.setSettingsIfRequired(query_context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context); Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas"); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index b6fb5738337..348dc062885 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,26 +1,25 @@ ===ddl_format_version 1==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 2==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 3==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 4==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=1 -query=2 +1 +1 +1 +1 ===exception==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=1 -exceptionCode=60 -exceptionCode=60 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 3f1dc53a20b..e313da78354 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -5,31 +5,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# The test cases in this file cover DDLs running on both Replicated database engine and non-Replicated database engine. +# Since the processing flow is a little bit different from each other, in order to share same reference file, +# we compare the expected result and actual result by ourselves. See check_span method below for more detail. + # This function takes following arguments: # $1 - OpenTelemetry Trace Id # $2 - Query # $3 - Query Settings -# $4 - Output device, default is stdout function execute_query() { - if [ -n "${4}" ]; then - output=$4 - else - output="/dev/stdout" - fi - - echo $2 | ${CLICKHOUSE_CURL} \ + # Some queries are supposed to fail, use -f to suppress error messages + echo $2 | ${CLICKHOUSE_CURL} -f \ -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ "${CLICKHOUSE_URL}?${3}" \ - --data @- \ - > $output + --data @- } -# This function takes 3 argument: -# $1 - OpenTelemetry Trace Id -# $2 - Fields +# This function takes following argument: +# $1 - expected +# $2 - OpenTelemetry Trace Id # $3 - operation_name pattern # $4 - extra condition function check_span() @@ -40,24 +37,38 @@ function check_span() extra_condition="" fi -${CLICKHOUSE_CLIENT} -nq " - SYSTEM FLUSH LOGS; + ret=$(${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; - SELECT ${2} - FROM system.opentelemetry_span_log - WHERE finish_date >= yesterday() - AND lower(hex(trace_id)) = '${1}' - AND operation_name like '${3}' - ${extra_condition} - Format TSKV - ;" + SELECT count() + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${2}' + AND operation_name like '${3}' + ${extra_condition};") + + if [ $ret = $1 ]; then + echo 1 + else + echo "[operation_name like '${3}' ${extra_condition}]=$ret, expected: ${1}" + + # echo the span logs to help analyze + ${CLICKHOUSE_CLIENT} -q " + SELECT operation_name, attribute + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) ='${2}' + ORDER BY start_time_us + Format PrettyCompact + " + fi } # # Set up # ${CLICKHOUSE_CLIENT} -q " -DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; " # Support Replicated database engine @@ -72,22 +83,50 @@ for ddl_version in 1 2 3 4; do echo "===ddl_format_version ${ddl_version}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" + execute_query $trace_id "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" - check_span $trace_id "count() AS httpHandler" "HTTPHandler" - check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" - check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" + check_span 1 $trace_id "HTTPHandler" - # For format_version 4, there should be two 'query' spans, + # For Replcated database engine, it does not call 'executeDDLQueryOnCluster' method, we don't need to check it + if [ $cluster_name = "test_shard_localhost" ]; then + check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" + else + # Only echo a value so that comparison of reference is correct + echo 1 + fi + + if [ $cluster_name = "test_shard_localhost" ]; then + # The tracing is only enabled when entry format version is 4 + if [ $ddl_version = "4" ]; then + expected=1 + else + expected=0 + fi + else + # For Replicated database engine, the tracing is always enabled because it calls DDLWorker::processTask directly + expected=1 + fi + check_span $expected $trace_id "%DDLWorker::processTask%" + + # For queries that tracing are enabled(format version is 4 or Replicated database engine), there should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. # # For other format, there should be only one 'query' span - # - check_span $trace_id "count() AS query" "query" + if [ $cluster_name = "test_shard_localhost" ]; then + if [ $ddl_version = "4" ]; then + expected=2 + else + expected=1 + fi + else + expected=2 + fi + check_span $expected $trace_id "query" # Remove table - ${CLICKHOUSE_CLIENT} -q " - DROP TABLE IF EXISTS ddl_test_for_opentelemetry; + # Under Replicated database engine, the DDL is executed as ON CLUSTER DDL, so distributed_ddl_output_mode is needed to supress output + ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode none -q " + DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; " done @@ -97,23 +136,27 @@ done # Echo a separator so that the reference file is more clear for reading echo "===exception====" -# Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. -# The exception will be checked in the span log trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" "/dev/null" +execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" -check_span $trace_id "count() AS httpHandler" "HTTPHandler" -check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" -check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" +check_span 1 $trace_id "HTTPHandler" -# There should be two 'query' spans, -# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. -# Both of these two spans contain exception -check_span $trace_id "attribute['clickhouse.exception_code'] AS exceptionCode" "query" +if [ $cluster_name = "test_shard_localhost" ]; then + expected=1 +else + # For Replicated database, executeDDLQueryOnCluster is not called + expected=0 +fi +check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" +check_span $expected $trace_id "%DDLWorker::processTask%" -# -# Tear down -# -${CLICKHOUSE_CLIENT} -q " -DROP TABLE IF EXISTS ddl_test_for_opentelemetry; -" \ No newline at end of file +if [ $cluster_name = "test_shard_localhost" ]; then + # There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # Both of these two spans contain exception + expected=2 +else + # For Replicated database, there should only one query span + expected=1 +fi +# We don't case about the exact value of exception_code, just check it's there. +check_span $expected $trace_id "query" "attribute['clickhouse.exception_code']<>''" From 5b72de031aabf1a0f975181e8b3447f48c947250 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Sat, 24 Sep 2022 13:53:02 +0800 Subject: [PATCH 129/173] Update test case Signed-off-by: Frank Chen --- tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index e313da78354..b055a155acf 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -20,7 +20,7 @@ function execute_query() -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ - "${CLICKHOUSE_URL}?${3}" \ + "${CLICKHOUSE_URL}&${3}" \ --data @- } From e8978165722603b3f902430552abdc3453e096c6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Sun, 25 Sep 2022 14:06:13 +0200 Subject: [PATCH 130/173] Evict stale DNS entries from cache in case of network errors (#41707) --- base/base/CachedFn.h | 75 --------------------------- src/Client/Connection.cpp | 6 +++ src/Common/DNSResolver.cpp | 76 ++++++++++++++++++++-------- src/Common/DNSResolver.h | 4 ++ src/Common/StackTrace.cpp | 27 ++++++++-- src/Common/tests/gtest_cached_fn.cpp | 54 -------------------- src/IO/S3/PocoHTTPClient.cpp | 8 +++ 7 files changed, 94 insertions(+), 156 deletions(-) delete mode 100644 base/base/CachedFn.h delete mode 100644 src/Common/tests/gtest_cached_fn.cpp diff --git a/base/base/CachedFn.h b/base/base/CachedFn.h deleted file mode 100644 index 19b2a8ce2c0..00000000000 --- a/base/base/CachedFn.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include -#include -#include "FnTraits.h" - -/** - * Caching proxy for a functor that decays to a pointer-to-function. - * Saves pairs (func args, func result on args). - * Cache size is unlimited. Cache items are evicted only on manual drop. - * Invocation/update is O(log(saved cache values)). - * - * See Common/tests/cached_fn.cpp for examples. - */ -template -struct CachedFn -{ -private: - using Traits = FnTraits; - using DecayedArgs = TypeListMap; - using Key = TypeListChangeRoot; - using Result = typename Traits::Ret; - - std::map cache; // Can't use hashmap as tuples are unhashable by default - mutable std::mutex mutex; - -public: - template - Result operator()(Args && ...args) - { - Key key{std::forward(args)...}; - - { - std::lock_guard lock(mutex); - - if (auto it = cache.find(key); it != cache.end()) - return it->second; - } - - Result res = std::apply(Func, key); - - { - std::lock_guard lock(mutex); - cache.emplace(std::move(key), res); - } - - return res; - } - - template - void update(Args && ...args) - { - Key key{std::forward(args)...}; - Result res = std::apply(Func, key); - - { - std::lock_guard lock(mutex); - // TODO Can't use emplace(std::move(key), ..), causes test_host_ip_change errors. - cache[key] = std::move(res); - } - } - - size_t size() const - { - std::lock_guard lock(mutex); - return cache.size(); - } - - void drop() - { - std::lock_guard lock(mutex); - cache.clear(); - } -}; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 7a663195655..a9795e75b28 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -179,6 +179,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { disconnect(); + /// Remove this possible stale entry from cache + DNSResolver::instance().removeHostFromCache(host); + /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. throw NetException(e.displayText() + " (" + getDescription() + ")", ErrorCodes::NETWORK_ERROR); } @@ -186,6 +189,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { disconnect(); + /// Remove this possible stale entry from cache + DNSResolver::instance().removeHostFromCache(host); + /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. /// This exception can only be thrown from socket->connect(), so add information about connection timeout. const auto & connection_timeout = static_cast(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout; diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 67d87f757c7..1e5ec09f262 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -1,7 +1,8 @@ #include "DNSResolver.h" -#include +#include #include #include +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include "DNSPTRResolverProvider.h" namespace ProfileEvents @@ -41,9 +43,11 @@ namespace ErrorCodes extern const int DNS_ERROR; } +namespace +{ /// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86 -static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) +void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) { String port_str; out_host.clear(); @@ -84,7 +88,7 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); } -static DNSResolver::IPAddresses hostByName(const std::string & host) +DNSResolver::IPAddresses hostByName(const std::string & host) { /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured. /// It should not affect client address checking, since client cannot connect from IPv6 address @@ -112,7 +116,7 @@ static DNSResolver::IPAddresses hostByName(const std::string & host) return addresses; } -static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) +DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) { Poco::Net::IPAddress ip; @@ -136,7 +140,13 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -static std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) +DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) +{ + auto [result, _ ] = cache.getOrSet(host, [&host]() { return std::make_shared(resolveIPAddressImpl(host)); }); + return *result; +} + +std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -149,13 +159,27 @@ static std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress } } +std::unordered_set reverseResolveWithCache( + CacheBase> & cache, const Poco::Net::IPAddress & address) +{ + auto [result, _ ] = cache.getOrSet(address, [&address]() { return std::make_shared>(reverseResolveImpl(address)); }); + return *result; +} + +Poco::Net::IPAddress pickAddress(const DNSResolver::IPAddresses & addresses) +{ + return addresses.front(); +} + +} + struct DNSResolver::Impl { using HostWithConsecutiveFailures = std::unordered_map; using AddressWithConsecutiveFailures = std::unordered_map; - CachedFn<&resolveIPAddressImpl> cache_host; - CachedFn<&reverseResolveImpl> cache_address; + CacheBase cache_host{100}; + CacheBase> cache_address{100}; std::mutex drop_mutex; std::mutex update_mutex; @@ -180,7 +204,7 @@ DNSResolver::DNSResolver() : impl(std::make_unique()), log(&P Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { - return resolveHostAll(host).front(); + return pickAddress(resolveHostAll(host)); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -189,7 +213,7 @@ DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) return resolveIPAddressImpl(host); addToNewHosts(host); - return impl->cache_host(host); + return resolveIPAddressWithCache(impl->cache_host, host); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) @@ -202,7 +226,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -211,7 +235,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -224,7 +248,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : impl->cache_host(host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -240,13 +264,13 @@ std::unordered_set DNSResolver::reverseResolve(const Poco::Net::IPAddres return reverseResolveImpl(address); addToNewAddresses(address); - return impl->cache_address(address); + return reverseResolveWithCache(impl->cache_address, address); } void DNSResolver::dropCache() { - impl->cache_host.drop(); - impl->cache_address.drop(); + impl->cache_host.reset(); + impl->cache_address.reset(); std::scoped_lock lock(impl->update_mutex, impl->drop_mutex); @@ -257,6 +281,11 @@ void DNSResolver::dropCache() impl->host_name.reset(); } +void DNSResolver::removeHostFromCache(const std::string & host) +{ + impl->cache_host.remove(host); +} + void DNSResolver::setDisableCacheFlag(bool is_disabled) { impl->disable_cache = is_disabled; @@ -378,17 +407,20 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - /// Usage of updateHost implies that host is already in cache and there is no extra computations - auto old_value = impl->cache_host(host); - impl->cache_host.update(host); - return old_value != impl->cache_host(host); + const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); + auto new_value = resolveIPAddressImpl(host); + const bool result = old_value != new_value; + impl->cache_host.set(host, std::make_shared(std::move(new_value))); + return result; } bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address) { - auto old_value = impl->cache_address(address); - impl->cache_address.update(address); - return old_value == impl->cache_address(address); + const auto old_value = reverseResolveWithCache(impl->cache_address, address); + auto new_value = reverseResolveImpl(address); + const bool result = old_value != new_value; + impl->cache_address.set(address, std::make_shared>(std::move(new_value))); + return result; } void DNSResolver::addToNewHosts(const String & host) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 83de616d81a..a05456d3de8 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -18,6 +18,7 @@ class DNSResolver : private boost::noncopyable { public: using IPAddresses = std::vector; + using IPAddressesPtr = std::shared_ptr; static DNSResolver & instance(); @@ -48,6 +49,9 @@ public: /// Drops all caches void dropCache(); + /// Removes an entry from cache or does nothing + void removeHostFromCache(const std::string & host); + /// Updates all known hosts in cache. /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 70f80b62868..37ce3a03cd8 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -4,14 +4,15 @@ #include #include #include -#include #include #include #include #include +#include #include #include +#include #include @@ -462,20 +463,36 @@ std::string StackTrace::toString(void ** frame_pointers_, size_t offset, size_t return toStringStatic(frame_pointers_copy, offset, size); } -static CachedFn<&toStringImpl> & cacheInstance() +using StackTraceRepresentation = std::tuple; +using StackTraceCache = std::map; + +static StackTraceCache & cacheInstance() { - static CachedFn<&toStringImpl> cache; + static StackTraceCache cache; return cache; } +static std::mutex stacktrace_cache_mutex; + std::string StackTrace::toStringStatic(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size) { /// Calculation of stack trace text is extremely slow. /// We use simple cache because otherwise the server could be overloaded by trash queries. - return cacheInstance()(frame_pointers, offset, size); + /// Note that this cache can grow unconditionally, but practically it should be small. + std::lock_guard lock{stacktrace_cache_mutex}; + + StackTraceRepresentation key{frame_pointers, offset, size}; + auto & cache = cacheInstance(); + if (cache.contains(key)) + return cache[key]; + + auto result = toStringImpl(frame_pointers, offset, size); + cache[key] = result; + return result; } void StackTrace::dropCache() { - cacheInstance().drop(); + std::lock_guard lock{stacktrace_cache_mutex}; + cacheInstance().clear(); } diff --git a/src/Common/tests/gtest_cached_fn.cpp b/src/Common/tests/gtest_cached_fn.cpp deleted file mode 100644 index ab15a1ee5e1..00000000000 --- a/src/Common/tests/gtest_cached_fn.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include - -using namespace std::chrono_literals; - -constexpr int add(int x, int y) -{ - return x + y; -} - -int longFunction(int x, int y) -{ - std::this_thread::sleep_for(1s); - return x + y; -} - -auto f = [](int x, int y) { return x - y; }; - -TEST(CachedFn, Basic) -{ - CachedFn<&add> fn; - - const int res = fn(1, 2); - EXPECT_EQ(fn(1, 2), res); - - /// In GCC, lambda can't be placed in TEST, producing " has no linkage". - /// Assuming http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4268.html, - /// this is a GCC bug. - CachedFn<+f> fn2; - - const int res2 = fn2(1, 2); - EXPECT_EQ(fn2(1, 2), res2); -} - -TEST(CachedFn, CachingResults) -{ - CachedFn<&longFunction> fn; - - for (int x = 0; x < 2; ++x) - { - for (int y = 0; y < 2; ++y) - { - const int res = fn(x, y); - const time_t start = time(nullptr); - - for (int count = 0; count < 1000; ++count) - EXPECT_EQ(fn(x, y), res); - - EXPECT_LT(time(nullptr) - start, 10); - } - } -} diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 9fe10aecda5..30373816eca 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,3 +1,4 @@ +#include "Common/DNSResolver.h" #include #if USE_AWS_S3 @@ -257,6 +258,9 @@ void PocoHTTPClient::makeRequestInternal( if (!request_configuration.proxy_host.empty()) { + if (enable_s3_requests_logging) + LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); + /// Reverse proxy can replace host header with resolved ip address instead of host name. /// This can lead to request signature difference on S3 side. session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); @@ -443,6 +447,10 @@ void PocoHTTPClient::makeRequestInternal( response->SetClientErrorMessage(getCurrentExceptionMessage(false)); addMetric(request, S3MetricType::Errors); + + /// Probably this is socket timeout or something more or less related to DNS + /// Let's just remove this host from DNS cache to be more safe + DNSResolver::instance().removeHostFromCache(Poco::URI(uri).getHost()); } } From c53f463d2d1cb06d4c7a9e0e5b5e657415296c04 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:15:59 +0200 Subject: [PATCH 131/173] Update S3ObjectStorage.cpp --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 15e5fed1fff..213f744d84f 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -230,9 +230,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); auto settings_ptr = s3_settings.get(); - ThreadPoolCallbackRunner scheduler; - if (write_settings.s3_allow_parallel_part_upload) - scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); auto s3_buffer = std::make_unique( client.get(), From e3a6f2381b24280575a652136bb40ee2d05646d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Sep 2022 17:03:52 +0200 Subject: [PATCH 132/173] Revert "Merge pull request #40033 from ClickHouse/reenable-avx512-vbmi-columnvector-filter" This reverts commit 70f63d2aae2fc3d04cb33e638569a46fb5a31f39, reversing changes made to a0693c3a845e219a3d8e82efde3439701e8469d4. --- src/Columns/ColumnVector.cpp | 160 ++++------------------ src/Columns/tests/gtest_column_vector.cpp | 158 --------------------- src/Common/CpuId.h | 6 - src/Common/TargetSpecific.cpp | 7 +- src/Common/TargetSpecific.h | 33 +---- 5 files changed, 32 insertions(+), 332 deletions(-) delete mode 100644 src/Columns/tests/gtest_column_vector.cpp diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 74bcdfa1768..cb570c87498 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -12,14 +12,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include @@ -27,10 +25,6 @@ # include #endif -#if USE_MULTITARGET_CODE -# include -#endif - #if USE_EMBEDDED_COMPILER #include #include @@ -477,128 +471,6 @@ void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); } -static inline UInt64 blsr(UInt64 mask) -{ -#ifdef __BMI__ - return _blsr_u64(mask); -#else - return mask & (mask-1); -#endif -} - -DECLARE_DEFAULT_CODE( -template -inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) -{ - while (filt_pos < filt_end_aligned) - { - UInt64 mask = bytes64MaskToBits64Mask(filt_pos); - - if (0xffffffffffffffff == mask) - { - res_data.insert(data_pos, data_pos + SIMD_BYTES); - } - else - { - while (mask) - { - size_t index = std::countr_zero(mask); - res_data.push_back(data_pos[index]); - mask = blsr(mask); - } - } - - filt_pos += SIMD_BYTES; - data_pos += SIMD_BYTES; - } -} -) - -namespace -{ -template -void resize(Container & res_data, size_t reserve_size) -{ -#if defined(MEMORY_SANITIZER) - res_data.resize_fill(reserve_size, static_cast(0)); // MSan doesn't recognize that all allocated memory is written by AVX-512 intrinsics. -#else - res_data.resize(reserve_size); -#endif -} -} - -DECLARE_AVX512VBMI2_SPECIFIC_CODE( -template -inline void compressStoreAVX512(const void *src, void *dst, const UInt64 mask) -{ - __m512i vsrc = _mm512_loadu_si512(src); - if constexpr (ELEMENT_WIDTH == 1) - _mm512_mask_compressstoreu_epi8(dst, static_cast<__mmask64>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 2) - _mm512_mask_compressstoreu_epi16(dst, static_cast<__mmask32>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 4) - _mm512_mask_compressstoreu_epi32(dst, static_cast<__mmask16>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 8) - _mm512_mask_compressstoreu_epi64(dst, static_cast<__mmask8>(mask), vsrc); -} - -template -inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) -{ - static constexpr size_t VEC_LEN = 64; /// AVX512 vector length - 64 bytes - static constexpr size_t ELEMENT_WIDTH = sizeof(T); - static constexpr size_t ELEMENTS_PER_VEC = VEC_LEN / ELEMENT_WIDTH; - static constexpr UInt64 KMASK = 0xffffffffffffffff >> (64 - ELEMENTS_PER_VEC); - - size_t current_offset = res_data.size(); - size_t reserve_size = res_data.size(); - size_t alloc_size = SIMD_BYTES * 2; - - while (filt_pos < filt_end_aligned) - { - /// to avoid calling resize too frequently, resize to reserve buffer. - if (reserve_size - current_offset < SIMD_BYTES) - { - reserve_size += alloc_size; - resize(res_data, reserve_size); - alloc_size *= 2; - } - - UInt64 mask = bytes64MaskToBits64Mask(filt_pos); - - if (0xffffffffffffffff == mask) - { - for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC) - _mm512_storeu_si512(reinterpret_cast(&res_data[current_offset + i]), - _mm512_loadu_si512(reinterpret_cast(data_pos + i))); - current_offset += SIMD_BYTES; - } - else - { - if (mask) - { - for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC) - { - compressStoreAVX512(reinterpret_cast(data_pos + i), - reinterpret_cast(&res_data[current_offset]), mask & KMASK); - current_offset += std::popcount(mask & KMASK); - /// prepare mask for next iter, if ELEMENTS_PER_VEC = 64, no next iter - if (ELEMENTS_PER_VEC < 64) - { - mask >>= ELEMENTS_PER_VEC; - } - } - } - } - - filt_pos += SIMD_BYTES; - data_pos += SIMD_BYTES; - } - /// resize to the real size. - res_data.resize(current_offset); -} -) - template ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { @@ -624,13 +496,31 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s static constexpr size_t SIMD_BYTES = 64; const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; -#if USE_MULTITARGET_CODE - static constexpr bool VBMI2_CAPABLE = sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8; - if (VBMI2_CAPABLE && isArchSupported(TargetArch::AVX512VBMI2)) - TargetSpecific::AVX512VBMI2::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); - else -#endif - TargetSpecific::Default::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); + while (filt_pos < filt_end_aligned) + { + UInt64 mask = bytes64MaskToBits64Mask(filt_pos); + + if (0xffffffffffffffff == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = std::countr_zero(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } while (filt_pos < filt_end) { diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp deleted file mode 100644 index 9dfb8c5aeb6..00000000000 --- a/src/Columns/tests/gtest_column_vector.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace DB; - -static pcg64 rng(randomSeed()); -static constexpr int error_code = 12345; -static constexpr size_t TEST_RUNS = 500; -static constexpr size_t MAX_ROWS = 10000; -static const std::vector filter_ratios = {1, 2, 5, 11, 32, 64, 100, 1000}; -static const size_t K = filter_ratios.size(); - -template -static MutableColumnPtr createColumn(size_t n) -{ - auto column = ColumnVector::create(); - auto & values = column->getData(); - - for (size_t i = 0; i < n; ++i) - { - values.push_back(i); - } - - return column; -} - -bool checkFilter(const PaddedPODArray &flit, const IColumn & src, const IColumn & dst) -{ - size_t n = flit.size(); - size_t dst_size = dst.size(); - size_t j = 0; /// index of dest - for (size_t i = 0; i < n; ++i) - { - if (flit[i] != 0) - { - if ((dst_size <= j) || (src.compareAt(i, j, dst, 0) != 0)) - return false; - j++; - } - } - return dst_size == j; /// filtered size check -} - -template -static void testFilter() -{ - auto test_case = [&](size_t rows, size_t filter_ratio) - { - auto vector_column = createColumn(rows); - PaddedPODArray flit(rows); - for (size_t i = 0; i < rows; ++i) - flit[i] = rng() % filter_ratio == 0; - auto res_column = vector_column->filter(flit, -1); - - if (!checkFilter(flit, *vector_column, *res_column)) - throw Exception(error_code, "VectorColumn filter failure, type: {}", typeid(T).name()); - }; - - try - { - for (size_t i = 0; i < TEST_RUNS; ++i) - { - size_t rows = rng() % MAX_ROWS + 1; - size_t filter_ratio = filter_ratios[rng() % K]; - - test_case(rows, filter_ratio); - } - } - catch (const Exception & e) - { - FAIL() << e.displayText(); - } -} - -TEST(ColumnVector, Filter) -{ - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); -} - -template -static MutableColumnPtr createIndexColumn(size_t limit, size_t rows) -{ - auto column = ColumnVector::create(); - auto & values = column->getData(); - auto max = std::numeric_limits::max(); - limit = limit > max ? max : limit; - - for (size_t i = 0; i < rows; ++i) - { - T val = rng() % limit; - values.push_back(val); - } - - return column; -} - -template -static void testIndex() -{ - static const std::vector column_sizes = {64, 128, 196, 256, 512}; - - auto test_case = [&](size_t rows, size_t index_rows, size_t limit) - { - auto vector_column = createColumn(rows); - auto index_column = createIndexColumn(rows, index_rows); - auto res_column = vector_column->index(*index_column, limit); - if (limit == 0) - limit = index_column->size(); - - /// check results - if (limit != res_column->size()) - throw Exception(error_code, "ColumnVector index size not match to limit: {} {}", typeid(T).name(), typeid(IndexType).name()); - for (size_t i = 0; i < limit; ++i) - { - /// vector_column data is the same as index, so indexed column's value will equals to index_column. - if (res_column->get64(i) != index_column->get64(i)) - throw Exception(error_code, "ColumnVector index fail: {} {}", typeid(T).name(), typeid(IndexType).name()); - } - }; - - try - { - for (size_t i = 0; i < TEST_RUNS; ++i) - { - /// make sure rows distribute in (column_sizes[r-1], colulmn_sizes[r]] - size_t row_idx = rng() % column_sizes.size(); - size_t row_base = row_idx > 0 ? column_sizes[row_idx - 1] : 0; - size_t rows = row_base + (rng() % (column_sizes[row_idx] - row_base) + 1); - size_t index_rows = rng() % MAX_ROWS + 1; - - test_case(rows, index_rows, 0); - test_case(rows, index_rows, static_cast(0.5 * index_rows)); - } - } - catch (const Exception & e) - { - FAIL() << e.displayText(); - } -} - -TEST(ColumnVector, Index) -{ - testIndex(); - testIndex(); - testIndex(); -} diff --git a/src/Common/CpuId.h b/src/Common/CpuId.h index 1e54ccf62b3..167fa22faf6 100644 --- a/src/Common/CpuId.h +++ b/src/Common/CpuId.h @@ -82,7 +82,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(AVX512BW) \ OP(AVX512VL) \ OP(AVX512VBMI) \ - OP(AVX512VBMI2) \ OP(PREFETCHWT1) \ OP(SHA) \ OP(ADX) \ @@ -303,11 +302,6 @@ bool haveAVX512VBMI() noexcept return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u); } -bool haveAVX512VBMI2() noexcept -{ - return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 6) & 1u); -} - bool haveRDRAND() noexcept { return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u); diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index a5fbe7de078..70b03833775 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -20,8 +20,6 @@ UInt32 getSupportedArchs() result |= static_cast(TargetArch::AVX512BW); if (Cpu::CpuFlagsCache::have_AVX512VBMI) result |= static_cast(TargetArch::AVX512VBMI); - if (Cpu::CpuFlagsCache::have_AVX512VBMI2) - result |= static_cast(TargetArch::AVX512VBMI2); return result; } @@ -40,9 +38,8 @@ String toString(TargetArch arch) case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; case TargetArch::AVX512F: return "avx512f"; - case TargetArch::AVX512BW: return "avx512bw"; - case TargetArch::AVX512VBMI: return "avx512vbmi"; - case TargetArch::AVX512VBMI2: return "avx512vbmi"; + case TargetArch::AVX512BW: return "avx512bw"; + case TargetArch::AVX512VBMI: return "avx512vbmi"; } __builtin_unreachable(); diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 250642f6ee4..f078c0e3ffc 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -31,7 +31,7 @@ * int funcImpl() { * return 2; * } - * ) // DECLARE_AVX2_SPECIFIC_CODE + * ) // DECLARE_DEFAULT_CODE * * int func() { * #if USE_MULTITARGET_CODE @@ -80,9 +80,8 @@ enum class TargetArch : UInt32 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), - AVX512BW = (1 << 4), - AVX512VBMI = (1 << 5), - AVX512VBMI2 = (1 << 6), + AVX512BW = (1 << 4), + AVX512VBMI = (1 << 5), }; /// Runtime detection. @@ -101,7 +100,6 @@ String toString(TargetArch arch); #if defined(__clang__) -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f"))) @@ -110,8 +108,6 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2\"))),apply_to=function)") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi\"))),apply_to=function)") # define BEGIN_AVX512BW_SPECIFIC_CODE \ @@ -133,7 +129,6 @@ String toString(TargetArch arch); # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); #else -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) @@ -142,9 +137,6 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")") @@ -225,16 +217,6 @@ namespace TargetSpecific::AVX512VBMI { \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) \ -BEGIN_AVX512VBMI2_SPECIFIC_CODE \ -namespace TargetSpecific::AVX512VBMI2 { \ - DUMMY_FUNCTION_DEFINITION \ - using namespace DB::TargetSpecific::AVX512VBMI2; \ - __VA_ARGS__ \ -} \ -END_TARGET_SPECIFIC_CODE - - #else #define USE_MULTITARGET_CODE 0 @@ -247,7 +229,6 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512F_SPECIFIC_CODE(...) #define DECLARE_AVX512BW_SPECIFIC_CODE(...) #define DECLARE_AVX512VBMI_SPECIFIC_CODE(...) -#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) #endif @@ -264,9 +245,8 @@ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) \ -DECLARE_AVX512BW_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512VBMI_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512VBMI2_SPECIFIC_CODE (__VA_ARGS__) +DECLARE_AVX512BW_SPECIFIC_CODE(__VA_ARGS__) \ +DECLARE_AVX512VBMI_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; /// NOLINT @@ -296,9 +276,6 @@ DECLARE_AVX512VBMI_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512VBMI; /// NOLINT ) // DECLARE_AVX512VBMI_SPECIFIC_CODE -DECLARE_AVX512VBMI2_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512VBMI2; /// NOLINT -) // DECLARE_AVX512VBMI2_SPECIFIC_CODE /** Runtime Dispatch helpers for class members. * From cf40c57562778d29824bb3b9c1268a2277665f70 Mon Sep 17 00:00:00 2001 From: jianmei zhang Date: Mon, 26 Sep 2022 15:14:58 +0800 Subject: [PATCH 133/173] Disable mergetree table with lightweight delete column name --- src/Interpreters/InterpreterCreateQuery.cpp | 10 ++++++++++ src/Storages/AlterCommands.cpp | 9 +++++++++ ...ee_with_lightweight_delete_column.reference | 1 + ...ergetree_with_lightweight_delete_column.sql | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference create mode 100644 tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 622f2a71ec9..41c378babcd 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -766,6 +766,16 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); } + /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. + if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree")) + { + auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); + if (search != all_columns.end()) + throw Exception("Cannot create table with column '" + LightweightDeleteDescription::FILTER_COLUMN.name + "' " + "for *MergeTree engines because it is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN); + } + const auto & settings = getContext()->getSettingsRef(); /// Check low cardinality types in creating table if it was not allowed in setting diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d370a67bfcc..d68252679a7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1056,6 +1057,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception{"Data type have to be specified for column " + backQuote(column_name) + " to add", ErrorCodes::BAD_ARGUMENTS}; + if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) + throw Exception{"Cannot add column " + backQuote(column_name) + ": this column name is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN}; + if (command.codec) CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs); @@ -1240,6 +1245,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception{"Cannot rename to " + backQuote(command.rename_to) + ": column with this name already exists", ErrorCodes::DUPLICATE_COLUMN}; + if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) + throw Exception{"Cannot rename to " + backQuote(command.rename_to) + ": this column name is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN}; + if (modified_columns.contains(column_name)) throw Exception{"Cannot rename and modify the same column " + backQuote(column_name) + " in a single ALTER query", ErrorCodes::NOT_IMPLEMENTED}; diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql new file mode 100644 index 00000000000..3aa56e00499 --- /dev/null +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql @@ -0,0 +1,18 @@ +drop table if exists t_row_exists; + +create table t_row_exists(a int, _row_exists int) engine=MergeTree order by a; --{serverError 44} + +create table t_row_exists(a int, b int) engine=MergeTree order by a; +alter table t_row_exists add column _row_exists int; --{serverError 44} +alter table t_row_exists rename column b to _row_exists; --{serverError 44} +drop table t_row_exists; + +create table t_row_exists(a int, _row_exists int) engine=Memory; +insert into t_row_exists values(1,1); +select * from t_row_exists; +drop table t_row_exists; + +create table t_row_exists(a int, b int) engine=Memory; +alter table t_row_exists add column _row_exists int; --{serverError 48} +alter table t_row_exists rename column b to _row_exists; --{serverError 48} +drop table t_row_exists; From a02354458aa0f0160cf2bcca8d21ab8e3073cf01 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Sep 2022 12:27:29 +0200 Subject: [PATCH 134/173] Review fixes --- .../IO/WriteBufferFromAzureBlobStorage.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index b57e12d842d..5c4debd56b6 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -11,6 +11,8 @@ namespace DB { +static constexpr auto DEFAULT_RETRY_NUM = 3; + WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, @@ -34,13 +36,12 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries) { - auto can_retry_exception = [&](const Exception & e, size_t i) -> bool + auto handle_exception = [&](const auto & e, size_t i) { if (i == num_tries - 1) - return false; + throw; LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {}", i + 1, blob_path, e.Message); - return true; }; for (size_t i = 0; i < num_tries; ++i) @@ -52,20 +53,18 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } catch (const Azure::Core::Http::TransportException & e) { - if (!can_retry_exception(e, i)) - throw; + handle_exception(e, i); } catch (const Azure::Core::RequestFailedException & e) { - if (!can_retry_exception(e, i)) - throw; + handle_exception(e, i); } } } void WriteBufferFromAzureBlobStorage::finalizeImpl() { - execWithRetry([this](){ next(); }, 3); + execWithRetry([this](){ next(); }, DEFAULT_RETRY_NUM); } void WriteBufferFromAzureBlobStorage::nextImpl() @@ -87,13 +86,13 @@ void WriteBufferFromAzureBlobStorage::nextImpl() const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + current_size), part_len); - execWithRetry([&block_blob_client, &block_id, &tmp_buffer](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, 3); + execWithRetry([&](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, DEFAULT_RETRY_NUM); current_size += part_len; LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path); } - execWithRetry([&block_blob_client, &block_ids](){ block_blob_client.CommitBlockList(block_ids); }, 3); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); if (write_settings.remote_throttler) From aabcfea5ede60766d0f601ec9881a0fbdc36d123 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 13:41:27 +0300 Subject: [PATCH 135/173] Update 02354_annoy.sql --- tests/queries/0_stateless/02354_annoy.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02354_annoy.sql b/tests/queries/0_stateless/02354_annoy.sql index d25b7333a89..8a8d023a104 100644 --- a/tests/queries/0_stateless/02354_annoy.sql +++ b/tests/queries/0_stateless/02354_annoy.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check SET allow_experimental_annoy_index = 1; From f12811012e557dc1d3e62abcd79e3114e4e98cbc Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Sep 2022 12:49:20 +0200 Subject: [PATCH 136/173] Fix integration tests --- .../static-files-disk-uploader/static-files-disk-uploader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index cb3f611fd4e..32f87d4d64a 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -58,7 +58,9 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes } else { - auto src_buf = createReadBufferFromFileBase(file_path, {}, fs::file_size(file_path)); + ReadSettings read_settings{}; + read_settings.local_fs_method = LocalFSReadMethod::pread; + auto src_buf = createReadBufferFromFileBase(file_path, read_settings, fs::file_size(file_path)); std::shared_ptr dst_buf; /// test mode for integration tests. From a760c71a0beea2aa8b2bffc29b9c8dabcf1638aa Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Sep 2022 12:52:12 +0200 Subject: [PATCH 137/173] Fix the typo preventing building latest images --- tests/ci/docker_images_check.py | 2 +- tests/ci/pr_info.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 773f3ac1b57..fb7228628fd 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -164,7 +164,7 @@ def gen_versions( # The order is important, PR number is used as cache during the build versions = [str(pr_info.number), pr_commit_version] result_version = pr_commit_version - if pr_info.number == 0 and pr_info.base_name == "master": + if pr_info.number == 0 and pr_info.base_ref == "master": # First get the latest for cache versions.insert(0, "latest") diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 77421ddac32..dc016a7eed9 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -132,9 +132,13 @@ class PRInfo: self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.pr_html_url = f"{repo_prefix}/pull/{self.number}" + # master or backport/xx.x/xxxxx - where the PR will be merged self.base_ref = github_event["pull_request"]["base"]["ref"] + # ClickHouse/ClickHouse self.base_name = github_event["pull_request"]["base"]["repo"]["full_name"] + # any_branch-name - the name of working branch name self.head_ref = github_event["pull_request"]["head"]["ref"] + # UserName/ClickHouse or ClickHouse/ClickHouse self.head_name = github_event["pull_request"]["head"]["repo"]["full_name"] self.body = github_event["pull_request"]["body"] self.labels = { From 8fde8b2c56653a9d49b028965c35fd5c59978a3a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 11:03:24 +0000 Subject: [PATCH 138/173] Try with multiple calls --- docker/test/stress/run.sh | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index a05321f30ec..7cb9c5ce0dc 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -256,6 +256,10 @@ start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log +for table in query_log trace_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.stress.tsv.gz ||: +done # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. @@ -338,6 +342,12 @@ echo $previous_release_tag | download_release_packets && echo -e 'Download scrip || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log +for table in query_log trace_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.clean.tsv.gz ||: +done + +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: # Check if we cloned previous release repository successfully if ! [ "$(ls -A previous_release_repository/tests/queries)" ] @@ -398,6 +408,10 @@ else stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.stress.tsv.gz ||: + done # Start new server mv package_folder/clickhouse /usr/bin/ @@ -496,6 +510,12 @@ else # Remove file bc_check_fatal_messages.txt if it's empty [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt + + tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.clean.tsv.gz ||: + done fi dmesg -T > /test_output/dmesg.log @@ -505,14 +525,8 @@ grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e && echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv -tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: mv /var/log/clickhouse-server/stderr.log /test_output/ -for table in query_log trace_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: -done - # Write check result into check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv From 6f4a636e8fafe743caeee0022ef02532c1954810 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 11:21:53 +0000 Subject: [PATCH 139/173] Remove wildcard --- docker/test/stress/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index d3554923eca..e787ff2e191 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -454,7 +454,7 @@ else -e "This engine is deprecated and is not supported in transactions" \ -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ - -e "ReplicatedMergeTreeAttachThread.*Initialization failed. Error" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv From c5224cd0071dd8fb59791016edde7813c40b14b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 11:40:09 +0000 Subject: [PATCH 140/173] Point automatic download script to to ARMv8.0 compat build - follow-up to #41610 - universal.sh downloads a the correct ClickHouse binary for the current platform - For Linux/Aarch64, point to v8.0 compat build for maximum compatibility. Also add property static_binary_name (ci_config.py) so that the binary can be placed into the right location. - Remove the unsupported combinations FreeBSD Aarch64 and PPC for which we provide no binaries. --- docs/_includes/install/universal.sh | 10 ++-------- tests/ci/ci_config.py | 1 + 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index c2970924fb0..e8240734c81 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -12,7 +12,7 @@ then DIR="amd64" elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then - DIR="aarch64" + DIR="aarch64v80compat" # ARMv8.0 for maximum compatibility elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" @@ -22,12 +22,6 @@ then if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ] then DIR="freebsd" - elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] - then - DIR="freebsd-aarch64" - elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] - then - DIR="freebsd-powerpc64le" fi elif [ "${OS}" = "Darwin" ] then @@ -42,7 +36,7 @@ fi if [ -z "${DIR}" ] then - echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported." + echo "Operating system '${OS}' / architecture '${ARCH}' is unsupported." exit 1 fi diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 19513491b1e..5e69046915e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -136,6 +136,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "aarch64v80compat", "libraries": "static", "tidy": "disable", "with_coverage": False, From 51c9f81dce1a43b0fea346018fca46b191e10aa2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Sep 2022 13:58:38 +0200 Subject: [PATCH 141/173] Fix tests for docker-ci --- tests/ci/docker_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 740cae5bc97..1848300e2f6 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -99,11 +99,11 @@ class TestDockerImageCheck(unittest.TestCase): def test_gen_version(self): pr_info = PRInfo(PRInfo.default_event.copy()) - pr_info.base_name = "anything-else" + pr_info.base_ref = "anything-else" versions, result_version = di.gen_versions(pr_info, None) self.assertEqual(versions, ["0", "0-HEAD"]) self.assertEqual(result_version, "0-HEAD") - pr_info.base_name = "master" + pr_info.base_ref = "master" versions, result_version = di.gen_versions(pr_info, None) self.assertEqual(versions, ["latest", "0", "0-HEAD"]) self.assertEqual(result_version, "0-HEAD") From 6acdeb84be96cba2df0ae6e8e9db28cdfadb981b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Sep 2022 23:19:57 +0200 Subject: [PATCH 142/173] clickhouse-client: refactor editor execution Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 236 +++++++++++++++++++-------------- 1 file changed, 137 insertions(+), 99 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 75c48f690f8..ef8787bc0a3 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include - namespace { @@ -35,6 +35,132 @@ std::string getEditor() return editor; } +/// See comments in ShellCommand::executeImpl() +/// (for the vfork via dlsym()) +int executeCommand(char * const argv[]) +{ + static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); + if (!real_vfork) + throw std::runtime_error("Cannot find vfork symbol"); + + pid_t pid = reinterpret_cast(real_vfork)(); + + if (-1 == pid) + throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString())); + + /// Child + if (0 == pid) + { + sigset_t mask; + sigemptyset(&mask); + sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + + execvp(argv[0], argv); + _exit(-1); + } + + int status = 0; + do + { + int exited_pid = waitpid(pid, &status, 0); + if (exited_pid != -1) + break; + + if (errno == EINTR) + continue; + + throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString())); + } while (true); + + return status; +} + +void writeRetry(int fd, const std::string & data) +{ + size_t bytes_written = 0; + const char * begin = data.c_str(); + size_t offset = data.size(); + + while (bytes_written != offset) + { + ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); + if ((-1 == res || 0 == res) && errno != EINTR) + throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString())); + bytes_written += res; + } +} +std::string readFile(const std::string & path) +{ + std::ifstream t(path); + std::string str; + t.seekg(0, std::ios::end); + str.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + return str; +} + +/// Simple wrapper for temporary files. +class TemporaryFile +{ +private: + std::string path; + int fd = -1; + +public: + explicit TemporaryFile(const char * pattern) + : path(pattern) + { + size_t dot_pos = path.rfind('.'); + if (dot_pos != std::string::npos) + fd = ::mkstemps(path.data(), path.size() - dot_pos); + else + fd = ::mkstemp(path.data()); + + if (-1 == fd) + throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString())); + } + ~TemporaryFile() + { + try + { + close(); + unlink(); + } + catch (const std::runtime_error & e) + { + fmt::print(stderr, "{}", e.what()); + } + } + + void close() + { + if (fd == -1) + return; + + if (0 != ::close(fd)) + throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString())); + fd = -1; + } + + void write(const std::string & data) + { + if (fd == -1) + throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path)); + + writeRetry(fd, data); + } + + void unlink() + { + if (0 != ::unlink(path.c_str())) + throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString())); + } + + std::string & getPath() { return path; } +}; + /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) @@ -293,116 +419,28 @@ void ReplxxLineReader::addToHistory(const String & line) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } -/// See comments in ShellCommand::executeImpl() -/// (for the vfork via dlsym()) -int ReplxxLineReader::executeEditor(const std::string & path) -{ - std::vector argv0(editor.data(), editor.data() + editor.size() + 1); - std::vector argv1(path.data(), path.data() + path.size() + 1); - char * const argv[] = {argv0.data(), argv1.data(), nullptr}; - - static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); - if (!real_vfork) - { - rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str()); - return -1; - } - - pid_t pid = reinterpret_cast(real_vfork)(); - - if (-1 == pid) - { - rx.print("Cannot vfork: %s\n", errnoToString().c_str()); - return -1; - } - - /// Child - if (0 == pid) - { - sigset_t mask; - sigemptyset(&mask); - sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - - execvp(editor.c_str(), argv); - rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str()); - _exit(-1); - } - - int status = 0; - do - { - int exited_pid = waitpid(pid, &status, 0); - if (exited_pid == -1) - { - if (errno == EINTR) - continue; - - rx.print("Cannot waitpid: %s\n", errnoToString().c_str()); - return -1; - } - else - break; - } while (true); - return status; -} - void ReplxxLineReader::openEditor() { - char filename[] = "clickhouse_replxx_XXXXXX.sql"; - int fd = ::mkstemps(filename, 4); - if (-1 == fd) - { - rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str()); - return; - } + TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql"); + editor_file.write(rx.get_state().text()); + editor_file.close(); - replxx::Replxx::State state(rx.get_state()); - - size_t bytes_written = 0; - const char * begin = state.text(); - size_t offset = strlen(state.text()); - while (bytes_written != offset) + char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr}; + try { - ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); - if ((-1 == res || 0 == res) && errno != EINTR) + if (executeCommand(argv) == 0) { - rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str()); - break; + const std::string & new_query = readFile(editor_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } - bytes_written += res; } - - if (0 != ::close(fd)) + catch (const std::runtime_error & e) { - rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } - - if (0 == executeEditor(filename)) - { - try - { - std::ifstream t(filename); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - rx.set_state(replxx::Replxx::State(str.c_str(), str.size())); - } - catch (...) - { - rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } + rx.print(e.what()); } if (bracketed_paste_enabled) enableBracketedPaste(); - - if (0 != ::unlink(filename)) - rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str()); } void ReplxxLineReader::enableBracketedPaste() From 58b61d8207c21c15e591aa4793d0d7ba6e889c6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:09:53 +0200 Subject: [PATCH 143/173] clickhouse-client: add interactive history search with fzf-like utility Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 44 ++++++++++++++++++++++++++++++++++ base/base/ReplxxLineReader.h | 1 + 2 files changed, 45 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index ef8787bc0a3..32d3d9aafe7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -375,6 +375,14 @@ ReplxxLineReader::ReplxxLineReader( return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); + + /// interactive search in history (ctrlp/fzf/skim) + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); } ReplxxLineReader::~ReplxxLineReader() @@ -443,6 +451,42 @@ void ReplxxLineReader::openEditor() enableBracketedPaste(); } +void ReplxxLineReader::openInteractiveHistorySearch() +{ + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + const std::string & new_query = readFile(output_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); +} + void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index b9ec214d02c..ba2ccf903b6 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -27,6 +27,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; From aaa36e2b259f43a4336d4094069afb460cd322c2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:23:14 +0200 Subject: [PATCH 144/173] clickhouse-client: add support of sk (fzf-like in rust) Signed-off-by: Azat Khuzhin Co-authored-by: Antonio Andelic --- base/base/ReplxxLineReader.cpp | 53 +++++++++++++++++++++++++++++----- base/base/ReplxxLineReader.h | 1 + 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 32d3d9aafe7..04b7ed2bca7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -14,7 +14,10 @@ #include #include #include +#include #include +#include +#include /// is_any_of namespace { @@ -35,6 +38,30 @@ std::string getEditor() return editor; } +std::string getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return sk_bin_path; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return fzf_bin_path; + } + + return {}; +} + /// See comments in ShellCommand::executeImpl() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) @@ -268,6 +295,7 @@ ReplxxLineReader::ReplxxLineReader( replxx::Replxx::highlighter_callback_t highlighter_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) , editor(getEditor()) + , fuzzy_finder(getFuzzyFinder()) { using namespace std::placeholders; using Replxx = replxx::Replxx; @@ -376,13 +404,16 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); - /// interactive search in history (ctrlp/fzf/skim) - auto interactive_history_search = [this](char32_t code) + /// interactive search in history (requires fzf/sk) + if (!fuzzy_finder.empty()) { - openInteractiveHistorySearch(); - return rx.invoke(Replxx::ACTION::REPAINT, code); - }; - rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -453,6 +484,7 @@ void ReplxxLineReader::openEditor() void ReplxxLineReader::openInteractiveHistorySearch() { + assert(!fuzzy_finder.empty()); TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); auto hs(rx.history_scan()); while (hs.next()) @@ -467,8 +499,13 @@ void ReplxxLineReader::openInteractiveHistorySearch() char sh[] = "sh"; char sh_c[] = "-c"; - std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); - char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + std::string fuzzy_finder_command = fmt::format( + "{} --read0 --height=30% < {} > {}", + fuzzy_finder, history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; try { diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index ba2ccf903b6..fea1405a208 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -37,4 +37,5 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; }; From d0f14e1255480dfb7f0b6f31668a1069e99bdf6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 17:39:03 +0200 Subject: [PATCH 145/173] clickhouse-client: proper support of vfork() w/o dlsym() in musl Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 04b7ed2bca7..e1b97e936c2 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -66,7 +66,17 @@ std::string getFuzzyFinder() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) { +#if !defined(USE_MUSL) + /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, + * because of the resolving of symbols in the shared library + * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html + * Therefore, separate the resolving of the symbol from the call. + */ static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif if (!real_vfork) throw std::runtime_error("Cannot find vfork symbol"); From 8cc53a48ae99a765085f44a75fa49314d1f1cc7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 26 Sep 2022 13:32:53 +0200 Subject: [PATCH 146/173] clickhouse-client: tune fzf/sk options to be a real reverse search Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index e1b97e936c2..916d4f9a74d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -513,7 +513,7 @@ void ReplxxLineReader::openInteractiveHistorySearch() /// - SKIM_DEFAULT_OPTIONS /// - FZF_DEFAULT_OPTS std::string fuzzy_finder_command = fmt::format( - "{} --read0 --height=30% < {} > {}", + "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", fuzzy_finder, history_file.getPath(), output_file.getPath()); char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; From 287d1e68b1f5e190629ed39db1369eea0608e46b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 12:22:23 +0000 Subject: [PATCH 147/173] Fix KeeperMap drop again --- src/Storages/StorageKeeperMap.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f6b110bbad0..11b6fe1b8dc 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -456,9 +456,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->removeChildrenRecursive(data_path); + zookeeper->tryRemoveChildrenRecursive(data_path, true); - bool completely_removed = false; + bool drop_done = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,20 +473,33 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - completely_removed = true; + drop_done = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); + { + // valid case when this can happen is if a table checked "dropped" path just before it was created. + // new table will create data/metadata paths again while drop is in progress + // only bad thing that can happen is if we start inserting data into new table while + // we remove data here (some data can be lost) + LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); + + // we need to remove at least "dropped" nodes + Coordination::Requests requests; + ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); + ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); + zookeeper->multi(requests); + drop_done = true; break; + } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return completely_removed; + return drop_done; } void StorageKeeperMap::drop() From 1e4ddf0a9c7ede9d56e7e05e05a44f5013f6c29c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 13:22:53 +0000 Subject: [PATCH 148/173] Choose fastest build for current platform --- docs/_includes/install/universal.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index e8240734c81..ff54f7cf90e 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -12,7 +12,16 @@ then DIR="amd64" elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then - DIR="aarch64v80compat" # ARMv8.0 for maximum compatibility + # If the system is >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 + # compat build. Unfortunately, 1. the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo, + # and 2. the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). + ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/') + if [ "${ARMV82}" ] + then + DIR="aarch64" + else + DIR="aarch64v80compat" + fi elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" From 2384761063ac455bf784382d680ecd9f3abe56cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:38:10 +0200 Subject: [PATCH 149/173] Fix drop of completely dropped table --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc0ace576ce..3aabd1a02a7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7487,6 +7487,10 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } + else if (code == Coordination::Error::ZNONODE) + { + LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); + } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From e20d3803c43128f11bffd7adef5d0e7118fc3a63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:40:25 +0200 Subject: [PATCH 150/173] Better fix --- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3aabd1a02a7..552035f478c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7451,8 +7451,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); + bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) createTableSharedID(); return toString(table_shared_id); @@ -7487,10 +7488,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } - else if (code == Coordination::Error::ZNONODE) - { - LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); - } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From ec35ff9cd6f1c0e9d8190c64226a1ea42782f2a1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 13:32:12 +0000 Subject: [PATCH 151/173] Log git hash during startup We currently only log a compiler-generated "build id" at startup which is different for each build. That makes it useless to determine the exact source code state in tests (e.g. BC test) and from user log files (e.g. if someone compiled an intermediate version of ClickHouse). Current log message: Starting ClickHouse 22.10.1.1 with revision 54467, build id: 6F35820328F89C9F36E91C447FF9E61CAF0EF019, PID 42633 New log message: Starting ClickHouse 22.10.1.1 (revision 54467, git hash: b6b1f7f763f94ffa12133679a6f80342dd1c3afe, build id: 47B12BE61151926FBBD230DE42F3B7A6652AC482), PID 981813 --- CMakeLists.txt | 39 ++++++++++++++++++++++++++++- cmake/git_status.cmake | 22 ---------------- src/Daemon/BaseDaemon.cpp | 26 +++++++++++-------- src/Daemon/BaseDaemon.h | 3 ++- src/Daemon/CMakeLists.txt | 4 +++ src/Daemon/GitHash.generated.cpp.in | 10 ++++++++ src/Storages/System/CMakeLists.txt | 36 +++----------------------- 7 files changed, 72 insertions(+), 68 deletions(-) delete mode 100644 cmake/git_status.cmake create mode 100644 src/Daemon/GitHash.generated.cpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..b0accceddc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,44 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) -include (cmake/git_status.cmake) + +find_package(Git) +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake deleted file mode 100644 index c1047c0ccbf..00000000000 --- a/cmake/git_status.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Print the status of the git repository (if git is available). -# This is useful for troubleshooting build failure reports - -find_package(Git) - -if (Git_FOUND) - - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_ID - OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) - -else() - message(STATUS "Git could not be found.") -endif() diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index d449768935a..157255bba12 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -266,8 +266,8 @@ private: { size_t pos = message.find('\n'); - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos)); + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -315,14 +315,14 @@ private: if (query_id.empty()) { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context) } @@ -838,6 +838,7 @@ static void blockSignals(const std::vector & signals) throw Poco::Exception("Cannot block signal."); } +extern String getGitHash(); void BaseDaemon::initializeTerminationAndSignalProcessing() { @@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #if defined(__ELF__) && !defined(OS_FREEBSD) String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); if (build_id_hex.empty()) - build_id_info = "no build id"; + build_id = ""; else - build_id_info = "build id: " + build_id_hex; + build_id = build_id_hex; #else - build_id_info = "no build id"; + build_id = ""; #endif + git_hash = getGitHash(); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 1b67ca986a8..d248ad9cec9 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -172,7 +172,8 @@ protected: DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - String build_id_info; + String build_id; + String git_hash; String stored_binary_hash; std::vector handled_signals; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 78c133d9893..7499d75d514 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,7 +1,11 @@ +set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") +configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) + add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp + ${GENERATED_GIT_HASH_CPP} ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in new file mode 100644 index 00000000000..833e9304b29 --- /dev/null +++ b/src/Daemon/GitHash.generated.cpp.in @@ -0,0 +1,10 @@ +// .cpp autogenerated by cmake + +#include + +static const String GIT_HASH = "@GIT_HASH@"; + +String getGitHash() +{ + return GIT_HASH; +} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index efc4c0ed37b..d2f7a5426db 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,49 +2,18 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") + get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) - get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) - -find_package(Git) -if(Git_FOUND) - # The commit's git hash, and whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Git branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) endfunction() + generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") @@ -78,6 +47,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) clickhouse_embed_binaries( TARGET information_schema_metadata RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" From 921776625e043b9d1d96c417e45470d28dc1485a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Sep 2022 16:20:00 +0200 Subject: [PATCH 152/173] Fix integration tests --- src/Disks/IO/createReadBufferFromFileBase.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index f42194b9052..98da89f81ed 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -63,10 +63,6 @@ std::unique_ptr createReadBufferFromFileBase( } } - auto context = Context::getGlobalContextInstance(); - if (!context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); - auto create = [&](size_t buffer_size, int actual_flags) { std::unique_ptr res; @@ -81,12 +77,20 @@ std::unique_ptr createReadBufferFromFileBase( } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); From 5c8ce2f543dd27eb623a1009ec7d040bdd78bdb5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 14:28:03 +0000 Subject: [PATCH 153/173] More correct --- src/Storages/StorageKeeperMap.cpp | 101 +++++++++++++++--------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 11b6fe1b8dc..bde6c4df80b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap( for (size_t i = 0; i < 1000; ++i) { + std::string stored_metadata_string; + auto exists = client->tryGet(metadata_path, stored_metadata_string); + + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + root_path, + stored_metadata_string); + + auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); + + // tables_path was removed with drop + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); + continue; + } + else if (code != Coordination::Error::ZOK) + { + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path); + } + + return; + } + if (client->exists(dropped_path)) { LOG_INFO(log, "Removing leftover nodes"); @@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap( } } - std::string stored_metadata_string; - auto exists = client->tryGet(metadata_path, stored_metadata_string); + Coordination::Requests create_requests + { + zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent), + }; - if (exists) + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - root_path, - stored_metadata_string); + LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + continue; } - else + else if (code != Coordination::Error::ZOK) { - auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent); - if (code == Coordination::Error::ZNODEEXISTS) - continue; - else if (code != Coordination::Error::ZOK) - throw Coordination::Exception(code, metadata_path); + zkutil::KeeperMultiException::check(code, create_requests, create_responses); } - client->createIfNotExists(tables_path, ""); - auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); - - if (code == Coordination::Error::ZOK) - { - // metadata now should be guaranteed to exist because we added our UUID to the tables_path - client->createIfNotExists(data_path, ""); - table_is_valid = true; - return; - } - - if (code == Coordination::Error::ZNONODE) - LOG_INFO(log, "Metadata nodes were deleted in background, will retry"); - else - throw Coordination::Exception(code, table_path); + table_is_valid = true; + return; } throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); @@ -456,9 +470,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->tryRemoveChildrenRecursive(data_path, true); + zookeeper->removeChildrenRecursive(data_path); - bool drop_done = false; + bool completely_removed = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,33 +487,20 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - drop_done = true; + completely_removed = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - { - // valid case when this can happen is if a table checked "dropped" path just before it was created. - // new table will create data/metadata paths again while drop is in progress - // only bad thing that can happen is if we start inserting data into new table while - // we remove data here (some data can be lost) - LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); - - // we need to remove at least "dropped" nodes - Coordination::Requests requests; - ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); - ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); - zookeeper->multi(requests); - drop_done = true; + LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); break; - } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return drop_done; + return completely_removed; } void StorageKeeperMap::drop() From 9711950c35edfe6f5eadb9c96a08a26150d41939 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 15:04:56 +0000 Subject: [PATCH 154/173] Fix build --- programs/keeper/Keeper.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6d487a68111..fdfe0cef2b3 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -490,8 +490,9 @@ int Keeper::main(const std::vector & /*args*/) void Keeper::logRevision() const { Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } From eb78761a7edcda7595c90ad8145981bb345e55dd Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 16:30:01 +0000 Subject: [PATCH 155/173] Collect necessary --- docker/test/stress/run.sh | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 7cb9c5ce0dc..d2b6539464c 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -255,11 +255,6 @@ start || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv stop -mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log -for table in query_log trace_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.stress.tsv.gz ||: -done # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. @@ -344,7 +339,7 @@ echo $previous_release_tag | download_release_packets && echo -e 'Download scrip mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.clean.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: done tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: @@ -408,10 +403,6 @@ else stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.stress.tsv.gz ||: - done # Start new server mv package_folder/clickhouse /usr/bin/ @@ -514,7 +505,7 @@ else tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.clean.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: done fi From 9263f0b2ed146e71e683a27dadd7681d677fb62c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 26 Sep 2022 19:12:52 +0200 Subject: [PATCH 156/173] Added test case for drop column _row_exists --- ...isable_mergetree_with_lightweight_delete_column.sql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql index 3aa56e00499..ff05157c64a 100644 --- a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql @@ -3,8 +3,10 @@ drop table if exists t_row_exists; create table t_row_exists(a int, _row_exists int) engine=MergeTree order by a; --{serverError 44} create table t_row_exists(a int, b int) engine=MergeTree order by a; -alter table t_row_exists add column _row_exists int; --{serverError 44} -alter table t_row_exists rename column b to _row_exists; --{serverError 44} +alter table t_row_exists add column _row_exists int; --{serverError ILLEGAL_COLUMN} +alter table t_row_exists rename column b to _row_exists; --{serverError ILLEGAL_COLUMN} +alter table t_row_exists drop column _row_exists; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} +alter table t_row_exists drop column unknown_column; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} drop table t_row_exists; create table t_row_exists(a int, _row_exists int) engine=Memory; @@ -13,6 +15,6 @@ select * from t_row_exists; drop table t_row_exists; create table t_row_exists(a int, b int) engine=Memory; -alter table t_row_exists add column _row_exists int; --{serverError 48} -alter table t_row_exists rename column b to _row_exists; --{serverError 48} +alter table t_row_exists add column _row_exists int; --{serverError NOT_IMPLEMENTED} +alter table t_row_exists rename column b to _row_exists; --{serverError NOT_IMPLEMENTED} drop table t_row_exists; From c2c2de8718316bcf334a9d163ceb879bbeb57efe Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 26 Sep 2022 19:21:55 +0200 Subject: [PATCH 157/173] Rename _row_exists column test case --- .../02454_disable_mergetree_with_lightweight_delete_column.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql index ff05157c64a..999210ef36e 100644 --- a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql @@ -5,6 +5,7 @@ create table t_row_exists(a int, _row_exists int) engine=MergeTree order by a; - create table t_row_exists(a int, b int) engine=MergeTree order by a; alter table t_row_exists add column _row_exists int; --{serverError ILLEGAL_COLUMN} alter table t_row_exists rename column b to _row_exists; --{serverError ILLEGAL_COLUMN} +alter table t_row_exists rename column _row_exists to c; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} alter table t_row_exists drop column _row_exists; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} alter table t_row_exists drop column unknown_column; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} drop table t_row_exists; From 922834ccde612f81ac5da61c72ef2f0afdadd64c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 19:31:08 +0200 Subject: [PATCH 158/173] minor fixes --- src/Databases/DatabaseReplicatedWorker.cpp | 4 ++++ .../02423_ddl_for_opentelemetry.reference | 10 ---------- .../0_stateless/02423_ddl_for_opentelemetry.sh | 15 ++++++--------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index a63235b3db0..8c2983e1939 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -221,6 +221,10 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr /// NOTE Possibly it would be better to execute initial query on the most up-to-date node, /// but it requires more complex logic around /try node. + OpenTelemetry::SpanHolder span(__FUNCTION__); + span.addAttribute("clickhouse.cluster", database->getDatabaseName()); + entry.tracing_context = OpenTelemetry::CurrentContext(); + auto zookeeper = getAndSetZooKeeper(); UInt32 our_log_ptr = getLogPointer(); UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr")); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 348dc062885..9c440ab4c67 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,13 +1,3 @@ -===ddl_format_version 1==== -1 -1 -1 -1 -===ddl_format_version 2==== -1 -1 -1 -1 ===ddl_format_version 3==== 1 1 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index b055a155acf..6164ff97d9f 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed +# Tags: zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -16,7 +16,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function execute_query() { # Some queries are supposed to fail, use -f to suppress error messages - echo $2 | ${CLICKHOUSE_CURL} -f \ + echo $2 | ${CLICKHOUSE_CURL_COMMAND} -q -s --max-time 180 \ -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ @@ -75,10 +75,9 @@ DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'") # -# Normal cases for ALL distributed_ddl_entry_format_version. # Only format_version 4 enables the tracing # -for ddl_version in 1 2 3 4; do +for ddl_version in 3 4; do # Echo a separator so that the reference file is more clear for reading echo "===ddl_format_version ${ddl_version}====" @@ -87,12 +86,10 @@ for ddl_version in 1 2 3 4; do check_span 1 $trace_id "HTTPHandler" - # For Replcated database engine, it does not call 'executeDDLQueryOnCluster' method, we don't need to check it if [ $cluster_name = "test_shard_localhost" ]; then check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" else - # Only echo a value so that comparison of reference is correct - echo 1 + check_span 1 $trace_id "%tryEnqueueAndExecuteEntry%" "attribute['clickhouse.cluster']='${cluster_name}'" fi if [ $cluster_name = "test_shard_localhost" ]; then @@ -137,14 +134,14 @@ done echo "===exception====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" +execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" 2>&1| grep -Fv "UNKNOWN_TABLE" check_span 1 $trace_id "HTTPHandler" if [ $cluster_name = "test_shard_localhost" ]; then expected=1 else - # For Replicated database, executeDDLQueryOnCluster is not called + # For Replicated database it will fail on initiator before enqueueing distributed DDL expected=0 fi check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" From 0f6a44efef4ca21cc922da0b1a67bf40f108b9db Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 20:11:30 +0200 Subject: [PATCH 159/173] fix missing metadata_version for old tables --- .../ReplicatedMergeTreeAttachThread.cpp | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index ba4979e57f2..90a28c373c7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int REPLICA_STATUS_CHANGED; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run() { if (const auto * coordination_exception = dynamic_cast(&e)) needs_retry = Coordination::isHardwareError(coordination_exception->code); + else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + needs_retry = true; if (needs_retry) { @@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run() void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path) { - /// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup. + /// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER. /// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica), /// but it did not work correctly, because "/metadata" node was re-created on server startup. /// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped. String replica_metadata; const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata); - if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version")) + if (!replica_metadata_exists || replica_metadata.empty()) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer " "should be done through an intermediate version (failed to get metadata or metadata_version for {}," @@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + String replica_metadata_version; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + if (replica_metadata_version_exists) + { + storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); - /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); From 99725e68d1ccf68df4b6ed05af5823cc407a40ed Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 19:28:27 +0000 Subject: [PATCH 160/173] Fix standalone keeper build --- programs/keeper/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..ac8f3b667f6 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp TinyContext.cpp From 540729119184db0565015f61ac298605c11b310a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Sep 2022 22:36:12 +0200 Subject: [PATCH 161/173] Revert "ColumnVector: optimize UInt8 index with AVX512VBMI (#41247)" This reverts commit 8de524cb7371ee2f0245239c798e95008f3eb0e8. --- src/Columns/ColumnVector.h | 136 +------------------------------------ 1 file changed, 2 insertions(+), 134 deletions(-) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index f967b2b4039..70a8a9bce4b 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -7,15 +7,11 @@ #include #include #include -#include #include #include #include "config_core.h" -#if USE_MULTITARGET_CODE -# include -#endif namespace DB { @@ -395,124 +391,6 @@ protected: Container data; }; -DECLARE_DEFAULT_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - for (size_t i = 0; i < limit; ++i) - res_data[i] = data[indexes[i]]; -} -); - -DECLARE_AVX512VBMI_SPECIFIC_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - static constexpr UInt64 MASK64 = 0xffffffffffffffff; - const size_t limit64 = limit & ~63; - size_t pos = 0; - size_t data_size = data.size(); - - auto data_pos = reinterpret_cast(data.data()); - auto indexes_pos = reinterpret_cast(indexes.data()); - auto res_pos = reinterpret_cast(res_data.data()); - - if (data_size <= 64) - { - /// one single mask load for table size <= 64 - __mmask64 last_mask = MASK64 >> (64 - data_size); - __m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos); - - /// 64 bytes table lookup using one single permutexvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - /// tail handling - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else if (data_size <= 128) - { - /// table size (64, 128] requires 2 zmm load - __mmask64 last_mask = MASK64 >> (128 - data_size); - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64); - - /// 128 bytes table lookup using one single permute2xvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else - { - if (data_size > 256) - { - /// byte index will not exceed 256 boundary. - data_size = 256; - } - - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_loadu_epi8(data_pos + 64); - __m512i table3, table4; - if (data_size <= 192) - { - /// only 3 tables need to load if size <= 192 - __mmask64 last_mask = MASK64 >> (192 - data_size); - table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128); - table4 = _mm512_setzero_si512(); - } - else - { - __mmask64 last_mask = MASK64 >> (256 - data_size); - table3 = _mm512_loadu_epi8(data_pos + 128); - table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192); - } - - /// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } -} -); - template template ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_t limit) const @@ -521,18 +399,8 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ auto res = this->create(limit); typename Self::Container & res_data = res->getData(); -#if USE_MULTITARGET_CODE - if constexpr (sizeof(T) == 1 && sizeof(Type) == 1) - { - /// VBMI optimization only applicable for (U)Int8 types - if (isArchSupported(TargetArch::AVX512VBMI)) - { - TargetSpecific::AVX512VBMI::vectorIndexImpl(data, indexes, limit, res_data); - return res; - } - } -#endif - TargetSpecific::Default::vectorIndexImpl(data, indexes, limit, res_data); + for (size_t i = 0; i < limit; ++i) + res_data[i] = data[indexes[i]]; return res; } From 6d7de37e3d4c01d9169750f98a131cdb4238e8f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Sep 2022 23:52:14 +0200 Subject: [PATCH 162/173] Small fix in dashboard --- programs/server/dashboard.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index e63a277497a..f013e3ac064 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) { sync.sub(plots[idx]); /// Set title - const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ); + const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : ''; chart.querySelector('.title').firstChild.data = title; } From 9c1a107f684882fbb690356def4a6da12215b4b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 09:00:46 +0200 Subject: [PATCH 163/173] Cosmetic changes in comment --- docs/_includes/install/universal.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index ff54f7cf90e..0fb5373a3ae 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -12,9 +12,9 @@ then DIR="amd64" elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then - # If the system is >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 - # compat build. Unfortunately, 1. the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo, - # and 2. the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). + # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 + # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo. + # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/') if [ "${ARMV82}" ] then From 588a5e5a42224e2e85f7878ed6fd1b0e881c85b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 07:29:18 +0000 Subject: [PATCH 164/173] Simplify a bit --- programs/keeper/CMakeLists.txt | 2 +- src/Daemon/CMakeLists.txt | 5 ++--- src/Daemon/GitHash.cpp.in | 8 ++++++++ src/Daemon/GitHash.generated.cpp.in | 10 ---------- src/Storages/System/CMakeLists.txt | 6 ++---- ...nerated.cpp.in => StorageSystemBuildOptions.cpp.in} | 2 +- 6 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 src/Daemon/GitHash.cpp.in delete mode 100644 src/Daemon/GitHash.generated.cpp.in rename src/Storages/System/{StorageSystemBuildOptions.generated.cpp.in => StorageSystemBuildOptions.cpp.in} (98%) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index ac8f3b667f6..ce176ccade5 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) - # Sraight list of all required sources + # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 7499d75d514..f02fd69aa79 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,11 +1,10 @@ -set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") -configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) +configure_file(GitHash.cpp.in GitHash.generated.cpp) add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - ${GENERATED_GIT_HASH_CPP} + GitHash.generated.cpp ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in new file mode 100644 index 00000000000..4a2da793fc2 --- /dev/null +++ b/src/Daemon/GitHash.cpp.in @@ -0,0 +1,8 @@ +// File was generated by CMake + +#include + +String getGitHash() +{ + return "@GIT_HASH@"; +} diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in deleted file mode 100644 index 833e9304b29..00000000000 --- a/src/Daemon/GitHash.generated.cpp.in +++ /dev/null @@ -1,10 +0,0 @@ -// .cpp autogenerated by cmake - -#include - -static const String GIT_HASH = "@GIT_HASH@"; - -String getGitHash() -{ - return GIT_HASH; -} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index d2f7a5426db..6bc080045f8 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,8 +2,6 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") - get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) @@ -11,14 +9,14 @@ function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) - configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) + configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) -list (APPEND storages_system_sources ${CONFIG_BUILD}) +list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp) add_custom_target(generate-contributors ./StorageSystemContributors.sh diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in similarity index 98% rename from src/Storages/System/StorageSystemBuildOptions.generated.cpp.in rename to src/Storages/System/StorageSystemBuildOptions.cpp.in index dde90ce459a..117d97d2cfd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// .cpp autogenerated by cmake +// File was generated by CMake const char * auto_config_build[] { From 69f3a66538661a35e9455e050f2226da01a74c3e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 27 Sep 2022 11:16:10 +0200 Subject: [PATCH 165/173] Keep the most important log in stress tests --- docker/test/stress/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 03eb0467f21..bf76fb20928 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -255,6 +255,7 @@ start || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. From 17a633c62c8d24f2d3926491c5531319956eafc4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 27 Sep 2022 11:17:51 +0200 Subject: [PATCH 166/173] Bump From 19062e9d9743f6a926d24fa26abe1f3b56cd2354 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 27 Sep 2022 14:26:45 +0300 Subject: [PATCH 167/173] Update src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp Co-authored-by: Antonio Andelic --- src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 90a28c373c7..7f91ffee1fe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -146,7 +146,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); if (replica_metadata_version_exists) { - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + storage.metadata_version = parse(replica_metadata_version); } else { From 823d8fb6cd4bf900564e68caedffdfa57b359ac6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:43:31 +0000 Subject: [PATCH 168/173] Move git calls back into git.cmake + renamed the file from originally "git_status.cmake" to "git.cmake" (because we not longer run only "git status") --- CMakeLists.txt | 39 +-------------------------------------- cmake/git.cmake | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 38 deletions(-) create mode 100644 cmake/git.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b0accceddc3..c737046a5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,44 +18,7 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) - -find_package(Git) -# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS -if (Git_FOUND) - # Commit hash + whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_HASH}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) -else() - message(STATUS "Git could not be found.") -endif() +include (cmake/git.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git.cmake b/cmake/git.cmake new file mode 100644 index 00000000000..93f38fd389c --- /dev/null +++ b/cmake/git.cmake @@ -0,0 +1,42 @@ +find_package(Git) + +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS. +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() + From 1f3f86e5bfd5c1358e24a7b423495ec3e312bb68 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:46:56 +0000 Subject: [PATCH 169/173] Cosmetics --- cmake/git.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/git.cmake b/cmake/git.cmake index 93f38fd389c..397ec3cd081 100644 --- a/cmake/git.cmake +++ b/cmake/git.cmake @@ -31,7 +31,7 @@ if (Git_FOUND) OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "HEAD's commit hash ${GIT_HASH}") + message(STATUS "Git HEAD commit hash: ${GIT_HASH}") execute_process( COMMAND ${GIT_EXECUTABLE} status From 728fe5d06fcb4e0866b8db584dcd5b6475473a4b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 27 Sep 2022 16:00:44 +0200 Subject: [PATCH 170/173] Change log level --- src/Storages/StorageKeeperMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index bde6c4df80b..28061aaaf48 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -384,7 +384,7 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryMulti(create_requests, create_responses); if (code == Coordination::Error::ZNODEEXISTS) { - LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); continue; } else if (code != Coordination::Error::ZOK) From 4be153cbd326d47a22b3b1d13466bd02f30a7a6f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 27 Sep 2022 10:21:35 -0400 Subject: [PATCH 171/173] fix link from intro --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..972acac8aaa 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). ## Normal View From 44d3eccf4ca99cb4210cb2e52226dfceafc377f6 Mon Sep 17 00:00:00 2001 From: mosinnik Date: Tue, 27 Sep 2022 19:13:40 +0300 Subject: [PATCH 172/173] Update external-data.md fix lost double hyphens --- .../engines/table-engines/special/external-data.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 95ae1aa9059..b98039f768a 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные, Таких секций может быть несколько - по числу передаваемых таблиц. -**–external** - маркер начала секции. -**–file** - путь к файлу с дампом таблицы, или -, что обозначает stdin. -Из stdin может быть считана только одна таблица. +- **--external** - маркер начала секции. +- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`. +Из `stdin` может быть считана только одна таблица. Следующие параметры не обязательные: -**–name** - имя таблицы. Если не указано - используется _data. -**–format** - формат данных в файле. Если не указано - используется TabSeparated. +- **--name** - имя таблицы. Если не указано - используется _data. +- **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -**–types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … -**–structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. From 4f23f6ef259d1f2b772f034670e63fab95abc376 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 27 Sep 2022 14:07:35 -0400 Subject: [PATCH 173/173] fix other links Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 972acac8aaa..14c06ee0336 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View