From 0951627b24fbd01bb1fdbbccd17157a4fd16ec54 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 01/84] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b586979b546..0da70193fea 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -69,7 +69,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9e3b60a8e54..a48bfefbcf4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -38,6 +38,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 24649128cee..cd257567cd5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -70,6 +70,7 @@ #include +#include namespace ProfileEvents { @@ -406,10 +407,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From cb4c45340238a148b4a942f145f66c82a9c1e7b9 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 02/84] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 766b1193d44ef0c1310fd606a1fba52661735154 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 03/84] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From 96bea2245b659b06c6c6a1f3ec9ddbc940d72969 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 04/84] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 61543683ecc09878e8855aabb3f36299637c0df7 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 05/84] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 7163b4359e506abaf0da50c1b26688b5aba2f275 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 06/84] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 44bbbd8b9f64901b828eac074f0047f5d565b0c8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 07/84] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 35207909e946de2fa30ab643dcadebb5286f10c2 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 08/84] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 31781601cb459589cb21fbf60d1139d7a3fc1652 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 09/84] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From c2b3aff3d7f54731dbbe93e89ec043d7699c9523 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 10/84] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 87182ccd51c1249f5b7d36f14ad8912aa3c43b30 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 11/84] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c399f01c565..1407395bf89 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 08a3df0a3e3..9d5535aa923 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index cdddd28adeb..20f4fa559f9 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -72,6 +72,7 @@ #include #include +#include namespace ProfileEvents { @@ -396,10 +397,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From 10f87612ebf599016e3b1ea47083f67363132ef8 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 12/84] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 45e8d29542f3a373d0b436f82b40a0cd2d608403 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 13/84] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From e20b2ed6eb19c3f471e94a6d7cbdaecd4eeb7a66 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 14/84] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 844bd7c3d7975a571c6c28a6de77390aef16eb69 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 15/84] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From fdaffac96b20c49c6ebed4c3babac2aa64e9fd9c Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 16/84] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 20758da3947550dc41445dea09eb6c9d91ddd1a3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 17/84] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 08022a818925c708807341c5631c6482bd17ef6e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 18/84] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 516a6c0844543d44d34feca5314b74000dff4f87 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 19/84] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 30ce50faff20570d379861286b85f46bc866070e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 20/84] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 8ee2a40a4c49c10c76005e535ca295da5ee8e696 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Tue, 16 Aug 2022 20:10:44 -0400 Subject: [PATCH 21/84] adding missing headers --- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index a780e18d333..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index f1fc13d2c48..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { From c2c457ea0e44a2453474153a78a3a133772ae7f0 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 17 Aug 2022 06:03:41 -0700 Subject: [PATCH 22/84] Kusto-phase1: Change the dialect to Enum, rename sql_dialect to dialect, set limit to subquery --- src/Client/ClientBase.cpp | 4 ++-- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 +++- src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/executeQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLLimit.cpp | 9 ++++++++- src/Parsers/Kusto/ParserKQLLimit.h | 5 +++++ src/Parsers/Kusto/ParserKQLOperators.h | 1 + src/Parsers/Kusto/ParserKQLQuery.cpp | 8 +++++++- src/Parsers/Kusto/ParserKQLTable.cpp | 1 + src/Parsers/tests/gtest_Parser.cpp | 12 ++++++------ 11 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1407395bf89..871a7849d5b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -308,9 +308,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; - const String & sql_dialect = settings.sql_dialect; + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); else parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d5535aa923..24f6d610a81 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,7 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ - M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ + M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 616026520db..54e1f882d58 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) - +IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, + {{"clickhouse", Dialect::clickhouse}, + {"kusto", Dialect::kusto}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 308d53ff690..3f52fa44237 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +enum class Dialect +{ + clickhouse, + kusto, + kusto_auto, +}; + +DECLARE_SETTING_ENUM(Dialect) } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 20f4fa559f9..8bd629f1adc 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -397,10 +397,9 @@ static std::tuple executeQueryImpl( String query_table; try { - const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto" && !internal) + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 4f7eddd9662..ece04f644cc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include namespace DB { @@ -46,7 +48,12 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) return false; pos = begin; diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index d425659499d..c234985b0a6 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,10 +8,15 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { +public: + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index ed6ebba2441..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f48..7f6fcbcdb70 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -95,8 +96,14 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; + kql_limit_p.setTableName(table_name); if (!kql_limit_p.parse(pos, limit_length, expected)) return false; + else + { + if (limit_length) + tables = std::move(limit_length); + } if (!kql_filter_p.parse(pos, where_expression, expected)) return false; @@ -114,7 +121,6 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 29fabd5056c..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index cb0b49aecbb..3575e8ba175 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,23 +308,23 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", @@ -336,7 +336,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" }, { "Customers | sort by FirstName asc", From 6b57b219a4997eef0275c3b4e5bcfb2c0968c81f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 20 Aug 2022 20:01:27 -0700 Subject: [PATCH 23/84] Kusto-phase1: remove unused variable --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a31104..f3c402a80be 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -121,7 +121,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String bin_column; String last_string; String column_name; - int character_passed = 0; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { @@ -160,7 +159,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - character_passed++; + if (String(pos->begin, pos->end) == ")") { expr_aggregation = expr_aggregation + " AS " + column_name; From eab8b7b42d72ee01aabe057290453ed8f21c2e5e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Sep 2022 08:25:08 +0000 Subject: [PATCH 24/84] Always start embedded Keeper in async mode --- programs/server/Server.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d788270ecf9..5e5a1be0b8f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1279,18 +1279,8 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (has_zookeeper) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + global_context->initializeKeeperDispatcher(/* start_async */ true); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & From 74c958931b4f24df1705fda9b03ad8f9e0b344ed Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 21:16:04 -0700 Subject: [PATCH 25/84] Kusto-phase1 : Updated kql pipe flow with some optimizations --- src/Parsers/Kusto/ParserKQLFilter.cpp | 20 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 47 +-- src/Parsers/Kusto/ParserKQLLimit.h | 6 - src/Parsers/Kusto/ParserKQLOperators.cpp | 156 ++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 8 +- src/Parsers/Kusto/ParserKQLProject.cpp | 28 +- src/Parsers/Kusto/ParserKQLProject.h | 6 - src/Parsers/Kusto/ParserKQLQuery.cpp | 353 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLQuery.h | 19 +- src/Parsers/Kusto/ParserKQLSort.cpp | 31 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 43 ++- src/Parsers/Kusto/ParserKQLStatement.h | 7 + src/Parsers/Kusto/ParserKQLSummarize.cpp | 192 +++--------- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- src/Parsers/Kusto/ParserKQLTable.cpp | 21 +- src/Parsers/Kusto/ParserKQLTable.h | 3 +- src/Parsers/tests/gtest_Parser.cpp | 30 +- 17 files changed, 567 insertions(+), 408 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d80..3a399bdccdb 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -10,27 +10,15 @@ namespace DB bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - Pos begin = pos; - String expr; - - KQLOperators convetor; - - for (auto op_po : op_pos) - { - if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; - else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; - } + String expr = getExprFromToken(pos); + ASTPtr where_expression; Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos pos_filter(token_filter, pos.max_depth); - if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index ece04f644cc..bb8e08fd378 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -11,52 +11,17 @@ namespace DB bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; + ASTPtr limit_length; - auto begin = pos; - Int64 min_limit = -1; - auto final_pos = pos; - for (auto op_po: op_pos) - { - auto is_number = [&] - { - for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) - { - if (!isdigit(*ch)) - return false; - } - return true; - }; + auto expr = getExprFromToken(pos); - if (!is_number()) - return false; + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - auto limit_length = std::strtol(op_po->begin,nullptr, 10); - if (-1 == min_limit) - { - min_limit = limit_length; - final_pos = op_po; - } - else - { - if (min_limit > limit_length) - { - min_limit = limit_length; - final_pos = op_po; - } - } - } - - String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); - - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); - - if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index c234985b0a6..1585805f0fc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { -public: - void setTableName(String table_name_) {table_name = table_name_;} - protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 60fa022f9bb..b250f5def60 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -10,9 +12,60 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) { - String new_expr, left_wildcards, right_wildcards; + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::Comma ) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +{ + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + --token_pos; + --token_pos; + return ch_op; + +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +{ + String new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -33,20 +86,45 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; + auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -87,14 +165,23 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { + auto last_op = tokens.back(); + auto last_pos = pos; + switch (op_value) { case KQLOperatorValue::contains: @@ -142,7 +229,6 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -152,9 +238,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: @@ -166,35 +254,67 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: - new_expr = "in"; + new_expr = genInOpExpr(pos,op,"in"); break; case KQLOperatorValue::not_in_cs: - new_expr = "not in"; + new_expr = genInOpExpr(pos,op,"not in"); break; case KQLOperatorValue::in: @@ -229,13 +349,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - return res; + pos = begin; + return false; } } + diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 64af156f505..9796ae10c07 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t @@ -83,7 +83,7 @@ protected: {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,7 +98,9 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); }; } diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c..e978323d821 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,38 +6,18 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto begin = pos; + ASTPtr select_expression_list; String expr; - if (op_pos.empty()) - expr = "*"; - else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } - expr = getExprFromToken(op_pos.back()); - } + expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index 3ab3c82f1be..b64675beed0 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLProject : public ParserKQLBase { -public: - void addColumn(String column) {columns.insert(column);} - protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - std::unordered_set columns; }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 7f6fcbcdb70..9fc32da7790 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,120 +8,339 @@ #include #include #include +#include +#include +#include +#include +#include +#include namespace DB { -bool ParserKQLBase :: parsePrepare(Pos & pos) +namespace ErrorCodes { - op_pos.push_back(pos); - return true; + extern const int UNKNOWN_FUNCTION; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + +String ParserKQLBase :: getExprFromPipe(Pos & pos) +{ + uint16_t bracket_count = 0; + auto begin = pos; + auto end = pos; + while (!end->isEnd() && end->type != TokenType::Semicolon) + { + if (end->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (end->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (end->type == TokenType::PipeMark && bracket_count == 0) + break; + + ++end; + } + --end; + return String(begin->begin, end->end); +} + +String ParserKQLBase :: getExprFromToken(Pos & pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::vector tokens; + String alias; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) + { + tokens.push_back(token); + } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + + for (auto token:tokens) + res = res.empty()? token : res +" " + token; return res; } +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +{ + if (op_name == "filter" || op_name == "where") + return std::make_unique(); + else if (op_name == "limit" || op_name == "take") + return std::make_unique(); + else if (op_name == "project") + return std::make_unique(); + else if (op_name == "sort by" || op_name == "order by") + return std::make_unique(); + else if (op_name == "summarize") + return std::make_unique(); + else if (op_name == "table") + return std::make_unique(); + else + return nullptr; +} + bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + auto select_query = std::make_shared(); node = select_query; - - ParserKQLFilter kql_filter_p; - ParserKQLLimit kql_limit_p; - ParserKQLProject kql_project_p; - ParserKQLSort kql_sort_p; - ParserKQLSummarize kql_summarize_p; - ParserKQLTable kql_table_p; - - ASTPtr select_expression_list; ASTPtr tables; - ASTPtr where_expression; - ASTPtr group_expression_list; - ASTPtr order_expression_list; - ASTPtr limit_length; - std::unordered_map kql_parser = { - { "filter",&kql_filter_p}, - { "where",&kql_filter_p}, - { "limit",&kql_limit_p}, - { "take",&kql_limit_p}, - { "project",&kql_project_p}, - { "sort",&kql_sort_p}, - { "order",&kql_sort_p}, - { "summarize",&kql_summarize_p}, - { "table",&kql_table_p} + std::unordered_map kql_parser = + { + { "filter", {"filter", false, false, 3}}, + { "where", {"filter", false, false, 3}}, + { "limit", {"limit", false, true, 3}}, + { "take", {"limit", false, true, 3}}, + { "project", {"project", false, false, 3}}, + { "sort by", {"order by", false, false, 4}}, + { "order by", {"order by", false, false, 4}}, + { "table", {"table", false, false, 3}}, + { "summarize", {"summarize", true, true, 3}} }; std::vector> operation_pos; - operation_pos.push_back(std::make_pair("table",pos)); - String table_name(pos->begin,pos->end); + String table_name(pos->begin, pos->end); - while (!pos->isEnd()) + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - ++pos; - if (pos->type == TokenType::PipeMark) + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + if (pos->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; - String kql_operator(pos->begin,pos->end); + String kql_operator(pos->begin, pos->end); + if (kql_operator == "order" || kql_operator == "sort") + { + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos,expected)) + { + kql_operator = "order by"; + --pos; + } + } if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator,pos)); + operation_pos.push_back(std::make_pair(kql_operator, pos)); } + else + ++pos; } - for (auto &op_pos : operation_pos) - { - auto kql_operator = op_pos.first; - auto npos = op_pos.second; - if (!npos.isValid()) - return false; + auto kql_operator_str = operation_pos.back().first; + auto npos = operation_pos.back().second; + if (!npos.isValid()) + return false; - if (!kql_parser[kql_operator]->parsePrepare(npos)) + auto kql_operator_p = getOperator(kql_operator_str); + + if (!kql_operator_p) + return false; + + if (operation_pos.size() == 1) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + npos = operation_pos.front().second; + if (!ParserKQLTable().parse(npos, node, expected)) + return false; + } + else + { + String project_clause, order_clause, where_clause, limit_clause; + auto last_pos = operation_pos.back().second; + auto last_op = operation_pos.back().first; + + auto set_main_query_clause =[&](String & op, Pos & op_pos) + { + auto op_str = ParserKQLBase::getExprFromPipe(op_pos); + if (op == "project") + project_clause = op_str; + else if (op == "where" || op == "filter") + where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); + else if (op == "limit" || op == "take") + limit_clause = op_str; + else if (op == "order by" || op == "sort by") + order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; + }; + + set_main_query_clause(last_op, last_pos); + + operation_pos.pop_back(); + + if (kql_parser[last_op].need_input) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else + { + while (operation_pos.size() > 0) + { + auto prev_op = operation_pos.back().first; + auto prev_pos = operation_pos.back().second; + + if (kql_parser[prev_op].gen_output) + break; + if (!project_clause.empty() && prev_op == "project") + break; + set_main_query_clause(prev_op, prev_pos); + operation_pos.pop_back(); + last_op = prev_op; + last_pos = prev_pos; + } + } + + if (operation_pos.size() > 0) + { + for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + --last_pos; + + String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + return false; + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + } + else + { + if (!ParserKQLTable().parse(last_pos, node, expected)) + return false; + } + + auto set_query_clasue =[&](String op_str, String op_calsue) + { + auto oprator = getOperator(op_str); + if (oprator) + { + Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + if (!oprator->parse(pos_clause, node, expected)) + return false; + } + return true; + }; + + if (!select_query->select()) + { + if (project_clause.empty()) + project_clause = "*"; + if (!set_query_clasue("project", project_clause)) + return false; + } + + if (!order_clause.empty()) + if (!set_query_clasue("order by", order_clause)) + return false; + + if (!where_clause.empty()) + if (!set_query_clasue("where", where_clause)) + return false; + + if (!limit_clause.empty()) + if (!set_query_clasue("limit", limit_clause)) + return false; + return true; + } + + if (!select_query->select()) + { + auto expr = String("*"); + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!std::make_unique()->parse(new_pos, node, expected)) return false; } - if (!kql_table_p.parse(pos, tables, expected)) + return true; +} + +bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_node; + + if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) return false; - if (!kql_project_p.parse(pos, select_expression_list, expected)) - return false; + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(select_node); - kql_limit_p.setTableName(table_name); - if (!kql_limit_p.parse(pos, limit_length, expected)) - return false; - else - { - if (limit_length) - tables = std::move(limit_length); - } + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; - if (!kql_filter_p.parse(pos, where_expression, expected)) - return false; + node_table_expr->children.emplace_back(node_subquery); - if (!kql_sort_p.parse(pos, order_expression_list, expected)) - return false; + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; - if (!kql_summarize_p.parse(pos, select_expression_list, expected)) - return false; - else - group_expression_list = kql_summarize_p.group_expression_list; + ASTPtr res = std::make_shared(); - select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + res->children.emplace_back(node_table_in_select_query_emlement); + node = res; return true; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9..42f5f84f031 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,25 +1,32 @@ #pragma once #include +#include namespace DB { class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; - -protected: - std::vector op_pos; - std::vector expressions; - virtual String getExprFromToken(Pos pos); + static String getExprFromToken(Pos & pos); + static String getExprFromPipe(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t & max_depth); }; class ParserKQLQuery : public IParserBase { + protected: + static std::unique_ptr getOperator(String &op_name); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLSubquery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 70e3283ee3e..f7540d729fd 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -10,41 +10,32 @@ namespace DB bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - - auto begin = pos; bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - ParserKeyword by("by"); + auto expr = getExprFromToken(pos); - pos = op_pos.back(); // sort only affected by last one + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - if (!by.ignore(pos, expected)) + auto pos_backup = new_pos; + if (!order_list.parse(pos_backup, order_expression_list, expected)) return false; - if (!order_list.parse(pos,order_expression_list,expected)) - return false; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - return false; - - pos = op_pos.back(); - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) { - String tmp(pos->begin,pos->end); + String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" or tmp == "asc") has_dir = true; - if (pos->type == TokenType::Comma) + if (new_pos->type == TokenType::Comma) { has_directions.push_back(has_dir); has_dir = false; } - - ++pos; + ++new_pos; } has_directions.push_back(has_dir); @@ -58,13 +49,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) order_expr->nulls_direction = -1; else order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; - } } - node = order_expression_list; + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - pos =begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad22131..573c953c313 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -35,7 +36,6 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - // will support union next phase ASTPtr kql_query; if (!ParserKQLQuery().parse(pos, kql_query, expected)) @@ -58,4 +58,45 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d00845..ef44b2d6c8a 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -41,5 +41,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f3c402a80be..4d8d7753178 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,8 +1,3 @@ -#include -#include -#include -#include -#include #include #include #include @@ -15,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -23,182 +17,64 @@ namespace DB { -std::pair ParserKQLSummarize::removeLastWord(String input) -{ - ReadBufferFromString in(input); - String token; - std::vector temp; - - while (!in.eof()) - { - readStringUntilWhitespace(token, in); - if (in.eof()) - break; - skipWhitespaceIfAny(in); - temp.push_back(token); - } - - String first_part; - for (std::size_t i = 0; i < temp.size() - 1; i++) - { - first_part += temp[i]; - } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } - - return std::make_pair("", ""); -} - -String ParserKQLSummarize::getBinGroupbyString(String expr_bin) -{ - String column_name; - bool bracket_start = false; - bool comma_start = false; - String bin_duration; - - for (char ch : expr_bin) - { - if (comma_start && ch != ')') - bin_duration += ch; - if (ch == ',') - { - comma_start = true; - bracket_start = false; - } - if (bracket_start) - column_name += ch; - if (ch == '(') - bracket_start = true; - } - - std::size_t len = bin_duration.size(); - char bin_type = bin_duration[len - 1]; // y, d, h, m, s - if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) - { - return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; - } - bin_duration = bin_duration.substr(0, len - 1); - - switch (bin_type) - { - case 'y': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" - + bin_duration + " * (12*30*86400))) AS bin_year"; - case 'd': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration - + " * 86400)) AS bin_day"; - case 'h': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration - + " * 3600)) AS bin_hour"; - case 'm': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration - + " * 60)) AS bin_minute"; - case 's': - return "toDateTime(" + column_name + ") AS bin_sec"; - default: - return ""; - } -} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - if (op_pos.size() != 1) // now only support one summarize - return false; + ASTPtr select_expression_list; + ASTPtr group_expression_list; - auto begin = pos; - - pos = op_pos.back(); String expr_aggregation; String expr_groupby; String expr_columns; - String expr_bin; bool groupby = false; - bool bin_function = false; - String bin_column; - String last_string; - String column_name; + + auto begin = pos; + auto pos_groupby = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "by") - groupby = true; - else { - if (groupby) - { - if (String(pos->begin, pos->end) == "bin" || bin_function) - { - bin_function = true; - expr_bin += String(pos->begin, pos->end); - if (String(pos->begin, pos->end) == ")") - { - expr_bin = getBinGroupbyString(expr_bin); - expr_groupby += expr_bin; - bin_function = false; - } - } - - else - expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; - } - - else - { - if (String(pos->begin, pos->end) == "=") - { - std::pair temp = removeLastWord(expr_aggregation); - expr_aggregation = temp.first; - column_name = temp.second; - } - else - { - if (!column_name.empty()) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - - if (String(pos->begin, pos->end) == ")") - { - expr_aggregation = expr_aggregation + " AS " + column_name; - column_name = ""; - } - } - else if (!bin_function) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; - } - } - } + groupby = true; + auto end = pos; + --end; + expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; + pos_groupby = pos; + ++pos_groupby; } ++pos; } - - if (expr_groupby.empty()) - expr_columns = expr_aggregation; + --pos; + if (groupby) + expr_groupby = String(pos_groupby->begin, pos->end); else - { - if (expr_aggregation.empty()) - expr_columns = expr_groupby; - else - expr_columns = expr_groupby + "," + expr_aggregation; - } - Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); - IParser::Pos pos_columns(token_columns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) + expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; + expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + if (groupby) { - Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); - IParser::Pos postoken_groupby(token_groupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); } - pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f..1aad02705df 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,16 +5,13 @@ namespace DB { + class ParserKQLSummarize : public ParserKQLBase { -public: - ASTPtr group_expression_list; protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - static std::pair removeLastWord(String input); - static String getBinGroupbyString(String expr_bin); }; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index fadf5305e89..6356ad688b6 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -7,15 +7,6 @@ namespace DB { -bool ParserKQLTable :: parsePrepare(Pos & pos) -{ - if (!op_pos.empty()) - return false; - - op_pos.push_back(pos); - return true; -} - bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords @@ -44,12 +35,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "EXPLAIN" }); - if (op_pos.empty()) - return false; - - auto begin = pos; - pos = op_pos.back(); - + ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); @@ -58,9 +44,10 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) return false; - if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) return false; - pos = begin; + + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d..c67dcb15156 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,11 +8,10 @@ namespace DB class ParserKQLTable : public ParserKQLBase { + protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool parsePrepare(Pos &pos) override; - }; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3575e8ba175..b452bd27642 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,27 +308,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" + "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | sort by FirstName desc", @@ -360,7 +360,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY Age DESC" + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" }, { "Customers | sort by FirstName nulls first", @@ -408,31 +408,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" }, { "Customers |summarize count() by Occupation", - "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", From 3f65e6b2b1cae1b0bc0e19df43d9d7da79ee5bc4 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 22:27:23 -0700 Subject: [PATCH 26/84] Kusto-phase1 : fixed style, removed trailing whitespaces --- src/Parsers/Kusto/ParserKQLQuery.cpp | 15 +++++---------- src/Parsers/Kusto/ParserKQLStatement.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 9fc32da7790..03cb5a8ad43 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -18,11 +18,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FUNCTION; -} - String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); @@ -95,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto token:tokens) res = res.empty()? token : res +" " + token; return res; } @@ -120,7 +115,7 @@ std::unique_ptr ParserKQLQuery::getOperator(String & op_name) bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - struct KQLOperatorDataFlowState + struct KQLOperatorDataFlowState { String operator_name; bool need_input; @@ -206,7 +201,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserKQLTable().parse(npos, node, expected)) return false; } - else + else { String project_clause, order_clause, where_clause, limit_clause; auto last_pos = operation_pos.back().second; @@ -252,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (operation_pos.size() > 0) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; @@ -274,7 +269,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto set_query_clasue =[&](String op_str, String op_calsue) { auto oprator = getOperator(op_str); - if (oprator) + if (oprator) { Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); IParser::Pos pos_clause(token_clause, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 573c953c313..21e480234d3 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -69,7 +69,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String kql_statement; if (s_lparen.ignore(pos, expected)) - { + { ++paren_count; while (!pos->isEnd()) { diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 4d8d7753178..75eacb1adbd 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -49,7 +49,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte expr_groupby = String(pos_groupby->begin, pos->end); else expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; - + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; From 4a68bfef393354468cb9b64b43dd9dddcd0d51eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 6 Sep 2022 10:58:14 +0000 Subject: [PATCH 27/84] Fix tests with async Keeper start --- tests/integration/helpers/keeper_utils.py | 41 ++++++++ .../test_keeper_and_access_storage/test.py | 3 + tests/integration/test_keeper_auth/test.py | 3 + .../test_keeper_back_to_back/test.py | 2 + .../configs/enable_keeper.xml | 22 ----- .../configs/keeper_conf.xml | 8 -- .../test_keeper_force_recovery/test.py | 62 ++++-------- .../test.py | 54 +++-------- .../test_keeper_four_word_command/test.py | 96 ++++++------------- .../test_keeper_incorrect_config/test.py | 4 +- .../test_keeper_internal_secure/test.py | 3 + .../test_keeper_mntr_pressure/test.py | 41 +++----- .../test.py | 29 +----- .../test_keeper_multinode_simple/test.py | 32 +------ .../integration/test_keeper_nodes_add/test.py | 5 + .../test_keeper_nodes_move/test.py | 4 + .../test_keeper_nodes_remove/test.py | 14 ++- .../test_keeper_persistent_log/test.py | 17 +++- .../test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../configs/enable_keeper_two_nodes_1.xml | 28 ------ .../configs/enable_keeper_two_nodes_2.xml | 28 ------ .../configs/enable_keeper_two_nodes_3.xml | 28 ------ .../test_keeper_restore_from_snapshot/test.py | 3 + .../test_keeper_secure_client/test.py | 3 +- tests/integration/test_keeper_session/test.py | 20 +--- .../test_keeper_snapshot_on_exit/test.py | 2 + .../test.py | 4 +- .../integration/test_keeper_snapshots/test.py | 11 ++- .../test_keeper_snapshots_multinode/test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../test_keeper_three_nodes_start/test.py | 2 + .../test_keeper_three_nodes_two_alive/test.py | 12 ++- .../test_keeper_two_nodes_cluster/test.py | 29 +----- .../test_keeper_znode_time/test.py | 25 +---- .../test_keeper_zookeeper_converter/test.py | 3 + 40 files changed, 218 insertions(+), 640 deletions(-) create mode 100644 tests/integration/helpers/keeper_utils.py delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py new file mode 100644 index 00000000000..681407e5e8c --- /dev/null +++ b/tests/integration/helpers/keeper_utils.py @@ -0,0 +1,41 @@ +import socket +import time + + +def get_keeper_socket(cluster, node, port=9181): + hosts = cluster.get_instance_ip(node.name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, port)) + return client + + +def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): + client = None + try: + client = get_keeper_socket(cluster, node, port) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def wait_until_connected(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_until_quorum_lost(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(cluster, nodes): + for node in nodes: + wait_until_connected(cluster, node) diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index ae6b0085094..72e3582979b 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -15,6 +16,8 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node1) + yield cluster finally: cluster.shutdown() diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 364d93dfc53..e1331c35eeb 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -25,6 +26,7 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -455,6 +457,7 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 73fface02b4..5ae71841004 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -61,6 +62,7 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml deleted file mode 100644 index c1d38a1de52..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - - diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml deleted file mode 100644 index ebb0d98ddf4..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - node1 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f3bb0ca56e3..f7c3787b4d8 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -2,6 +2,7 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time @@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): while zk.retry(zk.exists, path) is None: time.sleep(0.1) @@ -104,9 +74,6 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: @@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster): for node in nodes[CLUSTER_SIZE:]: node.stop_clickhouse() - wait_nodes(nodes[:CLUSTER_SIZE]) + keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE]) node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]] @@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster): node.stop_clickhouse() # wait for node1 to lose quorum - while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, nodes[0]) nodes[0].copy_file_to_container( os.path.join(CONFIG_DIR, "recovered_keeper1.xml"), @@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster): nodes[0].query("SYSTEM RELOAD CONFIG") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG - send_4lw_cmd(nodes[0].name, "rcvr") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) + keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr") + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) # add one node to restore the quorum nodes[CLUSTER_SIZE].copy_file_to_container( @@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster): ) nodes[CLUSTER_SIZE].start_clickhouse() - wait_until_connected(nodes[CLUSTER_SIZE].name) + keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE]) # node1 should have quorum now and accept requests - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name)) @@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster): f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml", ) node.start_clickhouse() - wait_until_connected(node.name) + keeper_utils.wait_until_connected(cluster, node) node_zks.append(get_fake_zk(node.name)) # refresh old zk sessions @@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) for zk in node_zks[:nodes_left]: assert_all_data(zk) diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 0a554e33119..1c0d5e9a306 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -2,10 +2,11 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooRetry CLUSTER_SIZE = 3 @@ -45,47 +46,19 @@ def started_cluster(): def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + hosts=cluster.get_instance_ip(nodename) + ":9181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=10), + command_retry=KazooRetry(max_tries=10), ) _fake_zk_instance.start() return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): - while zk.exists(path) is None: + while zk.retry(zk.exists, path) is None: time.sleep(0.1) - assert zk.get(path)[0] == data.encode() + assert zk.retry(zk.get, path)[0] == data.encode() def close_zk(zk): @@ -93,20 +66,17 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: - wait_nodes(nodes) + keeper_utils.wait_nodes(cluster, nodes) node_zks = [get_fake_zk(node.name) for node in nodes] data_in_cluster = [] def add_data(zk, path, data): - zk.create(path, data.encode()) + zk.retry(zk.create, path, data.encode()) data_in_cluster.append((path, data)) def assert_all_data(zk): @@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster): ) nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) assert_all_data(get_fake_zk(nodes[0].name)) finally: diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..30abc7422c4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -1,6 +1,7 @@ import socket import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -25,6 +26,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -56,28 +61,6 @@ def clear_znodes(): destroy_zk_client(zk) -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - # zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def reset_node_stats(node_name=node1.name): +def reset_node_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"srst") client.recv(10) finally: @@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name): client.close() -def send_4lw_cmd(node_name=node1.name, cmd="ruok"): +def reset_conn_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def reset_conn_stats(node_name=node1.name): - client = None - try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"crst") client.recv(10_000) finally: @@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster): client = None try: wait_nodes() - data = send_4lw_cmd(cmd="ruok") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok") assert data == "imok" finally: close_keeper_socket(client) @@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster): clear_znodes() # reset stat first - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action( @@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster): delete_cnt=2, ) - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") # print(data.decode()) reader = csv.reader(data.split("\n"), delimiter="\t") @@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="srst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst") assert data.strip() == "Server stats reset." - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") assert len(data) != 0 # print(data) @@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="conf") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf") reader = csv.reader(data.split("\n"), delimiter="=") result = {} @@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster): def test_cmd_isro(started_cluster): wait_nodes() - assert send_4lw_cmd(node1.name, "isro") == "rw" - assert send_4lw_cmd(node2.name, "isro") == "ro" + assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw" + assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro" def test_cmd_srvr(started_cluster): @@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster): wait_nodes() clear_znodes() - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="srvr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr") print("srvr output -------------------------------------") print(data) @@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster): try: wait_nodes() clear_znodes() - reset_node_stats(node1.name) - reset_conn_stats(node1.name) + reset_node_stats(node1) + reset_conn_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="stat") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat") print("stat output -------------------------------------") print(data) @@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output -------------------------------------") print(data) @@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="crst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst") print("crst output -------------------------------------") print(data) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output(after crst) -------------------------------------") print(data) @@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, ephemeral_cnt=2) - data = send_4lw_cmd(cmd="dump") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump") print("dump output -------------------------------------") print(data) @@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchs") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs") print("wchs output -------------------------------------") print(data) @@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchc") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc") print("wchc output -------------------------------------") print(data) @@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchp") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp") print("wchp output -------------------------------------") print(data) diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index e0a28b00b4f..9912959611a 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -173,7 +174,7 @@ NORMAL_CONFIG = """ """ -def test_duplicate_endpoint(started_cluster): +def test_invalid_configs(started_cluster): node1.stop_clickhouse() def assert_config_fails(config): @@ -192,5 +193,6 @@ def test_duplicate_endpoint(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2d45e95e4ff..2448a426fe2 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -47,6 +48,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 471767210d6..1468aa01896 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import pytest import random import string @@ -30,6 +31,7 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -37,40 +39,22 @@ def started_cluster(): cluster.shutdown() -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - def test_aggressive_mntr(started_cluster): - def go_mntr(node_name): - for _ in range(100000): - print(node_name, send_4lw_cmd(node_name, "mntr")) + def go_mntr(node): + for _ in range(10000): + try: + print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr")) + except ConnectionRefusedError: + pass - node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) - node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) - node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread = threading.Thread(target=lambda: go_mntr(node1)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3)) node1_thread.start() node2_thread.start() node3_thread.start() @@ -78,8 +62,7 @@ def test_aggressive_mntr(started_cluster): node2.stop_clickhouse() node3.stop_clickhouse() - while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, node1) node1.stop_clickhouse() starters = [] diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index d6d01a5d0a6..06a5cd8dc5a 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -44,6 +45,7 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -55,31 +57,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -92,7 +69,6 @@ def get_fake_zk(nodename, timeout=30.0): @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -296,7 +272,6 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 694600acc67..b8bdb098c0d 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -43,31 +45,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -78,7 +55,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -120,7 +96,6 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -177,7 +152,6 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -219,7 +193,6 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -244,7 +217,6 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): - wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index c3449534e87..aad674332ac 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -41,9 +42,11 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def test_nodes_add(started_cluster): + keeper_utils.wait_until_connected(cluster, node1) zk_conn = get_fake_zk(node1) for i in range(100): @@ -62,6 +65,7 @@ def test_nodes_add(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) zk_conn2 = get_fake_zk(node2) @@ -93,6 +97,7 @@ def test_nodes_add(started_cluster): node2.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) zk_conn3 = get_fake_zk(node3) for i in range(100): diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 31082846fb8..c816d69e2d1 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -11,6 +11,7 @@ import os import time from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) @@ -33,6 +34,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: @@ -41,6 +44,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def get_fake_zk(node, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 13303d320eb..03536f07064 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,6 +2,8 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +import time import os from kazoo.client import KazooClient, KazooState @@ -23,6 +25,7 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -79,9 +82,12 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn3 = get_fake_zk(node3) zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node3.stop_clickhouse() @@ -91,6 +97,7 @@ def test_nodes_remove(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") + zk_conn = get_fake_zk(node1) zk_conn.sync("/test_two_0") @@ -98,8 +105,11 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node2.stop_clickhouse() diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 377fa436a87..d7cc79836a7 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) + yield cluster finally: @@ -46,6 +49,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -62,7 +70,7 @@ def test_state_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -111,7 +119,7 @@ def test_state_duplicate_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_duplicated_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -119,7 +127,7 @@ def test_state_duplicate_restart(started_cluster): node_zk2.create("/test_state_duplicated_restart/just_test2") node_zk2.create("/test_state_duplicated_restart/just_test3") - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk3 = get_connection_zk("node") @@ -159,6 +167,7 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): + try: node_zk = None node_zk2 = None @@ -176,7 +185,7 @@ def test_ephemeral_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_ephemeral_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index f15e772fd5f..1552abd32e9 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -26,10 +27,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml deleted file mode 100644 index d51e420f733..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml deleted file mode 100644 index 3f1ee1e01a8..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml deleted file mode 100644 index a99bd5d5296..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7270c84bdda..7f2c2e89703 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -24,6 +25,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -84,6 +86,7 @@ def test_recover_from_snapshot(started_cluster): # stale node should recover from leader's snapshot # with some sanitizers can start longer than 5 seconds node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) print("Restarted") try: diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 55e00880da0..81584129052 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import string import os import time @@ -40,4 +41,4 @@ def started_cluster(): def test_connection(started_cluster): # just nothrow - node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") + node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 30db4d9548c..645045e7865 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time import socket import struct @@ -44,25 +45,8 @@ def destroy_zk_client(zk): pass -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for n in [node1]: - wait_node(n) + keeper_utils.wait_nodes(cluster, [node1]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 1ca5888ab4d..933e83414a4 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -27,6 +28,7 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 4351c5ac96f..6a64cf0ac92 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -2,6 +2,7 @@ ##!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool from kazoo.client import KazooClient, KazooState import random @@ -22,7 +23,7 @@ node3 = cluster.add_instance( def start_zookeeper(node): - node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) def stop_zookeeper(node): @@ -66,6 +67,7 @@ def stop_clickhouse(node): def start_clickhouse(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots, node): diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 08f60e538a4..a27ca6f92a5 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -35,6 +36,7 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -50,6 +52,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -69,7 +76,7 @@ def test_state_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -123,7 +130,7 @@ def test_ephemeral_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 1461f35e6a4..52d4ae71e33 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -20,10 +21,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -94,6 +100,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index e451f969b37..c8476568786 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -31,6 +32,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index f1de469c5a1..591dde6a70a 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -39,6 +40,7 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -48,6 +50,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def delete_with_retry(node_name, path): @@ -74,10 +77,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -110,10 +113,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -138,6 +141,7 @@ def test_restart_third_node(started_cluster): node1_zk.create("/test_restart", b"aaaa") node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) assert node3.contains_in_log( "Connected to ZooKeeper (or Keeper) before internal Keeper start" diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 8c0276f7d77..b87dcf6e758 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -29,6 +30,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -40,31 +42,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -75,7 +52,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -107,7 +83,6 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index bff3d52014e..f2076acc4d2 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -42,29 +43,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): @@ -129,6 +109,7 @@ def test_server_restart(started_cluster): node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue") node3.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node3) node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 50a9ee6a4a7..e459078f8ef 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -11,6 +12,7 @@ from kazoo.exceptions import ( ) import os import time +import socket cluster = ClickHouseCluster(__file__) @@ -60,6 +62,7 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots): From 7f4935b782b4519a6d1fd79fab2ae2aa6f6173ea Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 05:52:31 -0700 Subject: [PATCH 28/84] Kusto-phase1: removed extra spaces --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index b250f5def60..f8e4f9eaab0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -29,7 +29,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Po while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma ) + if (token_pos->type == TokenType::Comma) new_expr = new_expr + logic_op; else new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 03cb5a8ad43..5e07e3c4d9a 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -61,7 +61,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) if (token == "=") { ++pos; - if (String(pos->begin,pos->end) != "~" ) + if (String(pos->begin,pos->end) != "~") { alias = tokens.back(); tokens.pop_back(); From 896174e0ba5a18b79daf59c39b85493a1e905bff Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 12:45:22 -0700 Subject: [PATCH 29/84] Kusto-phase1: fixed small build issue --- src/Parsers/Kusto/ParserKQLQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 5e07e3c4d9a..8591b0f04df 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -90,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto const &token : tokens) res = res.empty()? token : res +" " + token; return res; } @@ -231,7 +231,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - while (operation_pos.size() > 0) + while (!operation_pos.empty()) { auto prev_op = operation_pos.back().first; auto prev_pos = operation_pos.back().second; From a0735a5816a751a0cc71886d65e37ff069250df3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 22:28:25 -0700 Subject: [PATCH 30/84] Kusto-phase1: use empty to check vector instead of size --- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 8591b0f04df..04ee36705a9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -247,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (!operation_pos.empty()) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; From 77d0971efa8e09c08675457abefc4166cd826cb3 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:46:55 +0800 Subject: [PATCH 31/84] Serialize tracing context --- src/Common/OpenTelemetryTraceContext.cpp | 31 +++++++++++++++++++++++- src/Common/OpenTelemetryTraceContext.h | 16 ++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 7a1f94926d5..af443861bea 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -227,6 +227,35 @@ String TracingContext::composeTraceparentHeader() const static_cast(trace_flags)); } +void TracingContext::deserialize(ReadBuffer & buf) +{ + if (!buf.eof() && *buf.position() == 't') + { + buf >> "tracing: " + >> this->trace_id + >> " " + >> this->span_id + >> " " + >> this->tracestate + >> " " + >> this->trace_flags + >> "\n"; + } +} + +void TracingContext::serialize(WriteBuffer & buf) const +{ + buf << "tracing: " + << this->trace_id + << " " + << this->span_id + << " " + << this->tracestate + << " " + << this->trace_flags + << "\n"; +} + const TracingContextOnThread & CurrentContext() { return current_thread_trace_context; diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 63136f8731d..20090960814 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -7,6 +7,8 @@ namespace DB struct Settings; class OpenTelemetrySpanLog; +class WriteBuffer; +class ReadBuffer; namespace OpenTelemetry { @@ -63,6 +65,9 @@ struct TracingContext { return trace_id != UUID(); } + + void deserialize(ReadBuffer & buf); + void serialize(WriteBuffer & buf) const; }; /// Tracing context kept on each thread @@ -155,7 +160,18 @@ struct SpanHolder : public Span void finish() noexcept; }; +} // End of namespace OpenTelemetry + +inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context) +{ + context.serialize(buf); + return buf; } +inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext & context) +{ + context.deserialize(buf); + return buf; } +} // End of namespace DB From 52224875e2e78d3ddfb15ce93545cc09afa97ffc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:47:43 +0800 Subject: [PATCH 32/84] Serialize tracing context to DDL log entry --- src/Interpreters/DDLTask.cpp | 4 ++++ src/Interpreters/DDLTask.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 50876ed29af..459cfc3be6f 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -94,6 +94,8 @@ String DDLLogEntry::toString() const wb << "settings: " << serializeAST(ast) << "\n"; } + wb << this->tracing_context; + return wb.str(); } @@ -132,6 +134,8 @@ void DDLLogEntry::parse(const String & data) ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth); settings.emplace(std::move(settings_ast->as()->changes)); } + + rb >> this->tracing_context; } assertEOF(rb); diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index d5990edd43f..fc85188a865 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -75,6 +76,7 @@ struct DDLLogEntry std::vector hosts; String initiator; // optional std::optional settings; + OpenTelemetry::TracingContext tracing_context; void setSettingsIfRequired(ContextPtr context); String toString() const; From 490089cc4f8baacff0113bd30d4977243b80f0f8 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:48:24 +0800 Subject: [PATCH 33/84] Copy tracing context from current thread to DDLLogEntry as parent context --- src/Interpreters/executeDDLQueryOnCluster.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 7cc4efcb64d..06a6512e21b 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -164,6 +164,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, entry.query = queryToString(query_ptr); entry.initiator = ddl_worker.getCommonHostID(); entry.setSettingsIfRequired(context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker.enqueueQuery(entry); return getDistributedDDLStatus(node_path, entry, context); From a4ef0c0281b13eac3ffee4523a08ea5f3a171da7 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 14:57:00 +0800 Subject: [PATCH 34/84] Set up tracing context for DDLWorker --- src/Interpreters/DDLWorker.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 6ec20ab5f5f..408fa2a28d3 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -515,6 +516,11 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query); chassert(!task.completely_processed); + /// Setup tracing context on current thread for current DDL + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + task.entry.tracing_context, + this->context->getOpenTelemetrySpanLog()); + String active_node_path = task.getActiveNodePath(); String finished_node_path = task.getFinishedNodePath(); From ac848727e5fc8aec4962d5fad25ad68c8501b1b6 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 15 Sep 2022 18:00:08 +0800 Subject: [PATCH 35/84] Read tracing context in right pos --- src/Interpreters/DDLTask.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 459cfc3be6f..b867b52ac20 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -134,10 +134,10 @@ void DDLLogEntry::parse(const String & data) ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth); settings.emplace(std::move(settings_ast->as()->changes)); } - - rb >> this->tracing_context; } + rb >> this->tracing_context; + assertEOF(rb); if (!host_id_strings.empty()) From d62ba01e93661f454cac40cccc14a0f3dc135267 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Thu, 15 Sep 2022 06:25:23 -0700 Subject: [PATCH 36/84] Fix SipHash Endian issue for s390x --- src/Common/SipHash.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6162de48143..6e1138b6510 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,6 +32,11 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define CURRENT_BYTES_IDX(i) (7-i) +#else +#define CURRENT_BYTES_IDX(i) (i) +#endif class SipHash { @@ -55,7 +60,7 @@ private: ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. - current_bytes[7] = static_cast(cnt); + current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; @@ -92,7 +97,7 @@ public: { while (cnt & 7 && data < end) { - current_bytes[cnt & 7] = *data; + current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } @@ -125,13 +130,13 @@ public: current_word = 0; switch (end - data) { - case 7: current_bytes[6] = data[6]; [[fallthrough]]; - case 6: current_bytes[5] = data[5]; [[fallthrough]]; - case 5: current_bytes[4] = data[4]; [[fallthrough]]; - case 4: current_bytes[3] = data[3]; [[fallthrough]]; - case 3: current_bytes[2] = data[2]; [[fallthrough]]; - case 2: current_bytes[1] = data[1]; [[fallthrough]]; - case 1: current_bytes[0] = data[0]; [[fallthrough]]; + case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; + case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; + case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; + case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; + case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; + case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; + case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } @@ -157,8 +162,8 @@ public: void get128(char * out) { finalize(); - unalignedStoreLE(out, v0 ^ v1); - unalignedStoreLE(out + 8, v2 ^ v3); + unalignedStore(out, v0 ^ v1); + unalignedStore(out + 8, v2 ^ v3); } template From f3e8738145b6505a8cc2f48f01bb5767a6d9ea9c Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Sat, 17 Sep 2022 19:48:08 -0700 Subject: [PATCH 37/84] Fixed issues in code review --- src/Common/SipHash.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6e1138b6510..281a65ca36a 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,8 +32,10 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array +/// to ensure correct byte order on different endian machines #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define CURRENT_BYTES_IDX(i) (7-i) +#define CURRENT_BYTES_IDX(i) (7 - i) #else #define CURRENT_BYTES_IDX(i) (i) #endif @@ -230,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } + +#undef CURRENT_BYTES_IDX From 2ae43bb4e8a8890629f36a0bbc3d5a0229f463ab Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:11:27 +0800 Subject: [PATCH 38/84] Add test case Signed-off-by: Frank Chen --- src/Common/OpenTelemetryTraceContext.cpp | 12 +-- .../02423_ddl_for_opentelemetry.reference | 8 ++ .../02423_ddl_for_opentelemetry.sh | 92 +++++++++++++++++++ 3 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference create mode 100755 tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 314118201bf..0a64900db9b 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -232,11 +232,11 @@ void TracingContext::deserialize(ReadBuffer & buf) { buf >> "tracing: " >> this->trace_id - >> " " + >> "\n" >> this->span_id - >> " " + >> "\n" >> this->tracestate - >> " " + >> "\n" >> this->trace_flags >> "\n"; } @@ -246,11 +246,11 @@ void TracingContext::serialize(WriteBuffer & buf) const { buf << "tracing: " << this->trace_id - << " " + << "\n" << this->span_id - << " " + << "\n" << this->tracestate - << " " + << "\n" << this->trace_flags << "\n"; } diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference new file mode 100644 index 00000000000..19b2fe09a20 --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -0,0 +1,8 @@ +1 +1 +2 +===case 2==== +1 +1 +exception_code=60 +exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh new file mode 100755 index 00000000000..272eaf4e345 --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Tags: distributed + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# This function takes following arguments: +# $1 - OpenTelemetry Trace Id +# $2 - Query +# $3 - Query Settings +# $4 - Output device, default is stdout +function execute_query() +{ + if [ -n "${4}" ]; then + output=$4 + else + output="/dev/stdout" + fi + + echo $2 | ${CLICKHOUSE_CURL} \ + -X POST \ + -H "traceparent: 00-$1-5150000000000515-01" \ + -H "tracestate: a\nb cd" \ + "${CLICKHOUSE_URL}?${3}" \ + --data @- \ + > $output +} + +# This function takes 3 argument: +# $1 - OpenTelemetry Trace Id +# $2 - Fields +# $3 - operation_name pattern +function check_span() +{ +${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + SELECT ${2} + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${1}' + AND operation_name like '${3}' + ;" +} + +# +# Set up +# +${CLICKHOUSE_CLIENT} -q " +DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +" + +# +# Case 1, a normal case +# +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); +execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none" + +check_span $trace_id "count()" "HTTPHandler" +check_span $trace_id "count()" "%DDLWorker::processTask%" + +# There should be two 'query' spans, +# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker +check_span $trace_id "count()" "query" + +# Echo a separator so that the reference file is more clear for reading +echo "===case 2====" + +# +# Case 2, an exceptional case, DROP a non-exist table +# +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); + +# Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. +# The exception will be checked in the span log +execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" + +check_span $trace_id "count()" "HTTPHandler" +check_span $trace_id "count()" "%DDLWorker::processTask%" + +# There should be two 'query' spans, +# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. +# Both of these two spans contain exception +check_span $trace_id "concat('exception_code=', attribute['clickhouse.exception_code'])" "query" + +# +# Tear down +# +${CLICKHOUSE_CLIENT} -q " +DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +" \ No newline at end of file From 06ae2fb2b581d6a5eb14c639c8cfbb1fe73353be Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:20:58 +0800 Subject: [PATCH 39/84] Remove assertEOF to improve the compability Signed-off-by: Frank Chen --- src/Interpreters/DDLTask.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index b867b52ac20..e33617d59f5 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -138,8 +138,6 @@ void DDLLogEntry::parse(const String & data) rb >> this->tracing_context; - assertEOF(rb); - if (!host_id_strings.empty()) { hosts.resize(host_id_strings.size()); From 37ae7a8cca56ebbbda0802b2c411ac0fb571687b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:25:27 -0700 Subject: [PATCH 40/84] Kusto-phase1 : apply parser comments to kusto, remove unused variable --- src/Interpreters/executeQuery.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e61494792b0..1a7c5032b02 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -389,12 +389,11 @@ static std::tuple executeQueryImpl( String query_table; try { - const Dialect & dialect = settings.dialect; - - if (dialect == Dialect::kusto && !internal) + if (settings.dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } else From ec852b3faa418765dc3201b893e3ae265663d144 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:38:07 -0700 Subject: [PATCH 41/84] Kusto-phase1 : change the parser in ClientBase from shared_ptr to unique_ptr --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f87487dff7c..f407fab68f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::shared_ptr parser; + std::unique_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -304,9 +304,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); else - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { From e478079f076f9e17240a906e1a3a8c156ac0afbb Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 11:46:09 +0800 Subject: [PATCH 42/84] Add test cases for different distributed_ddl_entry_format_version Signed-off-by: Frank Chen --- .../02423_ddl_for_opentelemetry.reference | 9 ++++ .../02423_ddl_for_opentelemetry.sh | 41 ++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 19b2fe09a20..68152d602cf 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,8 +1,17 @@ +===case 1==== 1 1 2 ===case 2==== 1 1 +2 +===case 3==== +1 +1 +2 +===case 4==== +1 +1 exception_code=60 exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 272eaf4e345..551e8b3c723 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -51,29 +51,42 @@ ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " +case_no=1; + # -# Case 1, a normal case +# normal cases for ALL distributed_ddl_entry_format_version # -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none" +for ddl_version in 1 2 3; do + # Echo a separator so that the reference file is more clear for reading + echo "===case ${case_no}====" -check_span $trace_id "count()" "HTTPHandler" -check_span $trace_id "count()" "%DDLWorker::processTask%" + trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" -# There should be two 'query' spans, -# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker -check_span $trace_id "count()" "query" + check_span $trace_id "count()" "HTTPHandler" + check_span $trace_id "count()" "%DDLWorker::processTask%" + # There should be two 'query' spans, + # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker + check_span $trace_id "count()" "query" + + # Remove table + ${CLICKHOUSE_CLIENT} -q " + DROP TABLE IF EXISTS ddl_test_for_opentelemetry; + " + + case_no=$(($case_no + 1)) +done + +# +# an exceptional case, DROP a non-exist table +# # Echo a separator so that the reference file is more clear for reading -echo "===case 2====" - -# -# Case 2, an exceptional case, DROP a non-exist table -# -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); +echo "===case ${case_no}====" # Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. # The exception will be checked in the span log +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" check_span $trace_id "count()" "HTTPHandler" From b056bc1021f3c3f2e7a6ad79690be7afb8f8c955 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 19 Sep 2022 13:43:40 +0800 Subject: [PATCH 43/84] Add span for executeDDLQueryOnCluster Signed-off-by: Frank Chen --- src/Interpreters/executeDDLQueryOnCluster.cpp | 4 ++++ .../queries/0_stateless/02423_ddl_for_opentelemetry.reference | 4 ++++ tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 ++ 3 files changed, 10 insertions(+) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 06a6512e21b..016a740a7bc 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -55,6 +55,8 @@ bool isSupportedAlterType(int type) BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params) { + OpenTelemetry::SpanHolder span(__FUNCTION__); + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); @@ -88,6 +90,8 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, cluster = context->getCluster(query->cluster); } + span.addAttribute("clickhouse.cluster", query->cluster); + /// TODO: support per-cluster grant context->checkAccess(AccessType::CLUSTER); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 68152d602cf..09c15e5098e 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,17 +1,21 @@ ===case 1==== 1 1 +test_shard_localhost 2 ===case 2==== 1 1 +test_shard_localhost 2 ===case 3==== 1 1 +test_shard_localhost 2 ===case 4==== 1 1 +test_shard_localhost exception_code=60 exception_code=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 551e8b3c723..043a968104d 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -65,6 +65,7 @@ for ddl_version in 1 2 3; do check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" + check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker @@ -91,6 +92,7 @@ execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUS check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" +check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. From 6f956329d5a96ee786a1d3aa34d902534b5ab424 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Mon, 19 Sep 2022 15:26:11 +0200 Subject: [PATCH 44/84] Remove obsolete comment from the config.xml Remove obsolete comment, see commit c059d0a0ee1e13c73cdefb821cb40aa01f6981c1 --- programs/server/config.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index a1e139d9e76..fef45c19d37 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1106,10 +1106,6 @@ system asynchronous_metric_log
- 7000
From a89140ae98dc187c354471d91ded302da31e1d9c Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 20 Sep 2022 10:37:54 +0800 Subject: [PATCH 45/84] Fix style Signed-off-by: Frank Chen --- src/Interpreters/DDLWorker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 3dc390785ef..8873d851de1 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -517,8 +517,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , - task.entry.tracing_context, + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); String active_node_path = task.getActiveNodePath(); From 21afe65e8e10cd0a2f916ca0562fd3152c3042dc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 10:54:40 +0800 Subject: [PATCH 46/84] Print content if diff says inputs are binary files Signed-off-by: Frank Chen --- tests/clickhouse-test | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 14cf4d0674a..f59ce0fa046 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -864,7 +864,13 @@ class TestCase: ], stdout=PIPE, universal_newlines=True, - ).communicate()[0] + ).communicate()[0] + if diff.startswith("Binary files "): + diff += "Content of stdout:\n===================\n" + file = open(self.stdout_file, "r") + diff += str(file.read()) + file.close() + diff += "===================" description += f"\n{diff}\n" if debug_log: description += "\n" From 4c1a062375367a64e306204a721ac42d10e1c62f Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 14:11:10 +0800 Subject: [PATCH 47/84] Fix style Signed-off-by: Frank Chen --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f59ce0fa046..79428d74c47 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -864,7 +864,7 @@ class TestCase: ], stdout=PIPE, universal_newlines=True, - ).communicate()[0] + ).communicate()[0] if diff.startswith("Binary files "): diff += "Content of stdout:\n===================\n" file = open(self.stdout_file, "r") From 020f30950f14de51e4b2b7579444b49f1aef3097 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 21 Sep 2022 19:56:32 +0800 Subject: [PATCH 48/84] Suppress the output to see if the test passes Signed-off-by: Frank Chen --- tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 043a968104d..84aa747fc56 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -61,7 +61,7 @@ for ddl_version in 1 2 3; do echo "===case ${case_no}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" check_span $trace_id "count()" "HTTPHandler" check_span $trace_id "count()" "%DDLWorker::processTask%" From 6798b500e9e05cdfbc22ac86830833248890e8df Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 21 Sep 2022 15:12:16 +0000 Subject: [PATCH 49/84] Wait on startup for Keeper --- programs/server/Server.cpp | 12 +++++++++++- src/Coordination/KeeperServer.cpp | 2 +- .../test_keeper_and_access_storage/test.py | 2 -- tests/integration/test_keeper_auth/test.py | 3 --- tests/integration/test_keeper_back_to_back/test.py | 2 -- .../integration/test_keeper_incorrect_config/test.py | 2 -- .../integration/test_keeper_internal_secure/test.py | 3 --- tests/integration/test_keeper_mntr_pressure/test.py | 1 - .../test_keeper_multinode_blocade_leader/test.py | 7 ++++++- .../integration/test_keeper_multinode_simple/test.py | 10 +++++++++- tests/integration/test_keeper_nodes_move/test.py | 2 -- tests/integration/test_keeper_nodes_remove/test.py | 2 -- tests/integration/test_keeper_persistent_log/test.py | 4 ---- .../test_keeper_restore_from_snapshot/test.py | 1 - tests/integration/test_keeper_secure_client/test.py | 1 - .../integration/test_keeper_snapshot_on_exit/test.py | 2 -- tests/integration/test_keeper_snapshots/test.py | 1 - .../test_keeper_snapshots_multinode/test.py | 1 - .../test_keeper_three_nodes_start/test.py | 2 -- .../test_keeper_three_nodes_two_alive/test.py | 9 ++++----- .../test_keeper_two_nodes_cluster/test.py | 7 ++++++- .../test_keeper_zookeeper_converter/test.py | 1 - 22 files changed, 37 insertions(+), 40 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 40b4b646b6e..8a0ce75ca70 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1282,8 +1282,18 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(/* start_async */ true); + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 42d7d967b1f..08092cf68f1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -705,7 +705,7 @@ void KeeperServer::waitInit() int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) - throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); + LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } std::vector KeeperServer::getDeadSessions() diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index 72e3582979b..6ec307f7082 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,7 +3,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -16,7 +15,6 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node1) yield cluster finally: diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index e1331c35eeb..364d93dfc53 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -26,7 +25,6 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -457,7 +455,6 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() - keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 5ae71841004..73fface02b4 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -62,7 +61,6 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index ec8b14a01e9..95482745b31 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -225,6 +224,5 @@ def test_invalid_configs(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() - keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2448a426fe2..2d45e95e4ff 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -48,8 +47,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 1468aa01896..d351b238ead 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -31,7 +31,6 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index 06a5cd8dc5a..a7a80d90a58 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -45,7 +45,6 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -65,10 +64,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + # in extremely rare case it can take more than 5 minutes in debug build with sanitizer @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -272,6 +276,7 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index b8bdb098c0d..1dcbb290fa8 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -33,7 +33,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -45,6 +44,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -55,6 +58,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -96,6 +100,7 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -152,6 +157,7 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -193,6 +199,7 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -217,6 +224,7 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): + wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index c816d69e2d1..1e3bd95c5e7 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -34,8 +34,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 03536f07064..59bdaadf2e2 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import time import os from kazoo.client import KazooClient, KazooState @@ -25,7 +24,6 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index d7cc79836a7..70cc14fe26d 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -33,8 +32,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) - yield cluster finally: @@ -51,7 +48,6 @@ def get_connection_zk(nodename, timeout=30.0): def restart_clickhouse(): node.restart_clickhouse(kill=True) - keeper_utils.wait_until_connected(cluster, node) def test_state_after_restart(started_cluster): diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7f2c2e89703..bc33689dd20 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -25,7 +25,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 81584129052..2a17afac75b 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import string import os import time diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 933e83414a4..1ca5888ab4d 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -28,7 +27,6 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index a27ca6f92a5..ce57a852dca 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -36,7 +36,6 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 52d4ae71e33..a68a34dae2e 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -29,7 +29,6 @@ def wait_nodes(): def started_cluster(): try: cluster.start() - wait_nodes() yield cluster diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index c8476568786..e451f969b37 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,7 +3,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,7 +31,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index 591dde6a70a..bd29ded357f 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -40,7 +40,6 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -77,10 +76,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -113,10 +112,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index b87dcf6e758..c6bc0ebd33a 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -30,7 +30,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -42,6 +41,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -52,6 +55,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -83,6 +87,7 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index e459078f8ef..af8d1ca4bf9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -12,7 +12,6 @@ from kazoo.exceptions import ( ) import os import time -import socket cluster = ClickHouseCluster(__file__) From 558aed814295f836e3df89d0dc1dd8bd0a7b7109 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 22 Sep 2022 14:30:38 +0800 Subject: [PATCH 50/84] Tag test case not executed under replicated database Signed-off-by: Frank Chen --- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index b2b5ae89105..50248cf01a1 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed +# Tags: distributed, no-replicated-database set -ue From cf97827b81173c3a678ae8bfec3b2aeececb596f Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 22 Sep 2022 23:03:28 +0800 Subject: [PATCH 51/84] Revert "Tag test case not executed under replicated database" This reverts commit 558aed814295f836e3df89d0dc1dd8bd0a7b7109. --- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 50248cf01a1..b2b5ae89105 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed, no-replicated-database +# Tags: distributed set -ue From 40f9e0b69a13ecacc97e35e83b4bf29573d7102a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 10:08:42 +0800 Subject: [PATCH 52/84] Address review comments Signed-off-by: Frank Chen --- src/Common/OpenTelemetryTraceContext.cpp | 21 +++++++++------------ src/Common/OpenTelemetryTraceContext.h | 4 ++-- src/Interpreters/DDLTask.cpp | 3 ++- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 0a64900db9b..3e7a172bdb2 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -228,18 +228,15 @@ String TracingContext::composeTraceparentHeader() const void TracingContext::deserialize(ReadBuffer & buf) { - if (!buf.eof() && *buf.position() == 't') - { - buf >> "tracing: " - >> this->trace_id - >> "\n" - >> this->span_id - >> "\n" - >> this->tracestate - >> "\n" - >> this->trace_flags - >> "\n"; - } + buf >> "tracing: " + >> this->trace_id + >> "\n" + >> this->span_id + >> "\n" + >> this->tracestate + >> "\n" + >> this->trace_flags + >> "\n"; } void TracingContext::serialize(WriteBuffer & buf) const diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 20090960814..03bac2891fc 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -160,7 +160,7 @@ struct SpanHolder : public Span void finish() noexcept; }; -} // End of namespace OpenTelemetry +} inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context) { @@ -174,4 +174,4 @@ inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext return buf; } -} // End of namespace DB +} diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index e33617d59f5..aff47db8242 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -136,7 +136,8 @@ void DDLLogEntry::parse(const String & data) } } - rb >> this->tracing_context; + if (!rb.eof() && *rb.position() == 't') + rb >> this->tracing_context; if (!host_id_strings.empty()) { From 2344e0738e9ed65061273103f791dca56d9f42ab Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:32:21 +0800 Subject: [PATCH 53/84] Keep compatibility during upgrading --- src/Common/OpenTelemetryTraceContext.cpp | 6 ++--- src/Interpreters/DDLTask.cpp | 32 ++++++++++++++++-------- src/Interpreters/DDLTask.h | 5 ++++ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index 3e7a172bdb2..515060803d6 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -228,8 +228,7 @@ String TracingContext::composeTraceparentHeader() const void TracingContext::deserialize(ReadBuffer & buf) { - buf >> "tracing: " - >> this->trace_id + buf >> this->trace_id >> "\n" >> this->span_id >> "\n" @@ -241,8 +240,7 @@ void TracingContext::deserialize(ReadBuffer & buf) void TracingContext::serialize(WriteBuffer & buf) const { - buf << "tracing: " - << this->trace_id + buf << this->trace_id << "\n" << this->span_id << "\n" diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index aff47db8242..73105ae003e 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -50,21 +50,26 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { - constexpr UInt64 max_version = 2; - if (version == 0 || max_version < version) + if (version == 0 + /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be version 3 + || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION + || version > MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}." - "Maximum supported version is {}", version, max_version); + "Maximum supported version is {}", version, MAX_VERSION); } void DDLLogEntry::setSettingsIfRequired(ContextPtr context) { - version = context->getSettingsRef().distributed_ddl_entry_format_version; + version = context->getSettingsRef(). ; + if (version <= 0 || version > MAX_VERSION) + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}." + "Maximum supported version is {}.", version, MAX_VERSION); /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION) version = SETTINGS_IN_ZK_VERSION; - if (version == SETTINGS_IN_ZK_VERSION) + if (version >= SETTINGS_IN_ZK_VERSION) settings.emplace(context->getSettingsRef().changes()); } @@ -94,7 +99,8 @@ String DDLLogEntry::toString() const wb << "settings: " << serializeAST(ast) << "\n"; } - wb << this->tracing_context; + if (version >= OPENTELEMETRY_ENABLED_VERSION) + wb << "tracing: " << this->tracing_context; return wb.str(); } @@ -108,7 +114,7 @@ void DDLLogEntry::parse(const String & data) Strings host_id_strings; rb >> "query: " >> escape >> query >> "\n"; - if (version == 1) + if (version == OLDEST_VERSION) { rb >> "hosts: " >> host_id_strings >> "\n"; @@ -117,9 +123,8 @@ void DDLLogEntry::parse(const String & data) else initiator.clear(); } - else if (version == 2) + else if (version >= SETTINGS_IN_ZK_VERSION) { - if (!rb.eof() && *rb.position() == 'h') rb >> "hosts: " >> host_id_strings >> "\n"; if (!rb.eof() && *rb.position() == 'i') @@ -136,8 +141,13 @@ void DDLLogEntry::parse(const String & data) } } - if (!rb.eof() && *rb.position() == 't') - rb >> this->tracing_context; + if (version >= OPENTELEMETRY_ENABLED_VERSION) + { + if (!rb.eof() && *rb.position() == 't') + rb >> "tracing: " >> this->tracing_context; + } + + assertEOF(rb); if (!host_id_strings.empty()) { diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index fc85188a865..7217ee2b98b 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -70,6 +70,11 @@ struct DDLLogEntry static constexpr const UInt64 OLDEST_VERSION = 1; static constexpr const UInt64 SETTINGS_IN_ZK_VERSION = 2; static constexpr const UInt64 NORMALIZE_CREATE_ON_INITIATOR_VERSION = 3; + static constexpr const UInt64 OPENTELEMETRY_ENABLED_VERSION = 4; + /// Add new version here + + /// Remember to update the value below once new version is added + static constexpr const UInt64 MAX_VERSION = 4; UInt64 version = 1; String query; From 7489a95f0be86a0ba26236a99744ee42ed3d5e91 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:33:00 +0800 Subject: [PATCH 54/84] Update test case to satisfy the ddl_format_version --- .../02423_ddl_for_opentelemetry.reference | 47 +++++++++-------- .../02423_ddl_for_opentelemetry.sh | 50 ++++++++++++------- 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 09c15e5098e..b6fb5738337 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,21 +1,26 @@ -===case 1==== -1 -1 -test_shard_localhost -2 -===case 2==== -1 -1 -test_shard_localhost -2 -===case 3==== -1 -1 -test_shard_localhost -2 -===case 4==== -1 -1 -test_shard_localhost -exception_code=60 -exception_code=60 +===ddl_format_version 1==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 2==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 3==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=0 +query=1 +===ddl_format_version 4==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=1 +query=2 +===exception==== +httpHandler=1 +executeDDLQueryOnCluster=1 +processTask=1 +exceptionCode=60 +exceptionCode=60 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 84aa747fc56..3f1dc53a20b 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -31,8 +31,15 @@ function execute_query() # $1 - OpenTelemetry Trace Id # $2 - Fields # $3 - operation_name pattern +# $4 - extra condition function check_span() { + if [ -n "$4" ]; then + extra_condition=" AND ${4}" + else + extra_condition="" + fi + ${CLICKHOUSE_CLIENT} -nq " SYSTEM FLUSH LOGS; @@ -41,6 +48,8 @@ ${CLICKHOUSE_CLIENT} -nq " WHERE finish_date >= yesterday() AND lower(hex(trace_id)) = '${1}' AND operation_name like '${3}' + ${extra_condition} + Format TSKV ;" } @@ -51,53 +60,56 @@ ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " -case_no=1; +# Support Replicated database engine +cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'") # -# normal cases for ALL distributed_ddl_entry_format_version +# Normal cases for ALL distributed_ddl_entry_format_version. +# Only format_version 4 enables the tracing # -for ddl_version in 1 2 3; do +for ddl_version in 1 2 3 4; do # Echo a separator so that the reference file is more clear for reading - echo "===case ${case_no}====" + echo "===ddl_format_version ${ddl_version}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER test_shard_localhost (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" + execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" - check_span $trace_id "count()" "HTTPHandler" - check_span $trace_id "count()" "%DDLWorker::processTask%" - check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" + check_span $trace_id "count() AS httpHandler" "HTTPHandler" + check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" + check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" - # There should be two 'query' spans, - # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker - check_span $trace_id "count()" "query" + # For format_version 4, there should be two 'query' spans, + # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # + # For other format, there should be only one 'query' span + # + check_span $trace_id "count() AS query" "query" # Remove table ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ddl_test_for_opentelemetry; " - - case_no=$(($case_no + 1)) done # # an exceptional case, DROP a non-exist table # # Echo a separator so that the reference file is more clear for reading -echo "===case ${case_no}====" +echo "===exception====" # Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. # The exception will be checked in the span log trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER test_shard_localhost" "distributed_ddl_output_mode=none" "/dev/null" +execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" "/dev/null" -check_span $trace_id "count()" "HTTPHandler" -check_span $trace_id "count()" "%DDLWorker::processTask%" -check_span $trace_id "attribute['clickhouse.cluster']" "%executeDDLQueryOnCluster%" +check_span $trace_id "count() AS httpHandler" "HTTPHandler" +check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" +check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" # There should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. # Both of these two spans contain exception -check_span $trace_id "concat('exception_code=', attribute['clickhouse.exception_code'])" "query" +check_span $trace_id "attribute['clickhouse.exception_code'] AS exceptionCode" "query" # # Tear down From 45e3d7d7889c50ce4ee6910974100d6c26e54e13 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:36:06 +0800 Subject: [PATCH 55/84] Update name/comments --- src/Interpreters/DDLTask.cpp | 12 ++++++------ src/Interpreters/DDLTask.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 73105ae003e..c4c8ceae454 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -51,19 +51,19 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { if (version == 0 - /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be version 3 + /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION - || version > MAX_VERSION) + || version > DDL_ENTRY_FORMAT_MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}." - "Maximum supported version is {}", version, MAX_VERSION); + "Maximum supported version is {}", version, DDL_ENTRY_FORMAT_MAX_VERSION); } void DDLLogEntry::setSettingsIfRequired(ContextPtr context) { - version = context->getSettingsRef(). ; - if (version <= 0 || version > MAX_VERSION) + version = context->getSettingsRef().distributed_ddl_entry_format_version; + if (version <= 0 || version > DDL_ENTRY_FORMAT_MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}." - "Maximum supported version is {}.", version, MAX_VERSION); + "Maximum supported version is {}.", version, DDL_ENTRY_FORMAT_MAX_VERSION); /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION) diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 7217ee2b98b..661cee84a45 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -74,7 +74,7 @@ struct DDLLogEntry /// Add new version here /// Remember to update the value below once new version is added - static constexpr const UInt64 MAX_VERSION = 4; + static constexpr const UInt64 DDL_ENTRY_FORMAT_MAX_VERSION = 4; UInt64 version = 1; String query; From 34bcb6a82bbe11a9e57f36984dd29b82cdb57cde Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 23 Sep 2022 11:48:50 +0800 Subject: [PATCH 56/84] Fix style Signed-off-by: Frank Chen --- src/Interpreters/DDLTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index c4c8ceae454..2d609c00406 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -50,7 +50,7 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { - if (version == 0 + if (version == 0 /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION || version > DDL_ENTRY_FORMAT_MAX_VERSION) From a999212082600c8d78d7466215c97fa8393bd78a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Sat, 24 Sep 2022 11:34:42 +0800 Subject: [PATCH 57/84] Update test cases to support both Replicated and non-Replicated database engine Signed-off-by: Frank Chen --- src/Databases/DatabaseReplicated.cpp | 2 + .../02423_ddl_for_opentelemetry.reference | 41 +++-- .../02423_ddl_for_opentelemetry.sh | 141 ++++++++++++------ 3 files changed, 114 insertions(+), 70 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f1bf56e2beb..507320fffde 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex entry.query = queryToString(query); entry.initiator = ddl_worker->getCommonHostID(); entry.setSettingsIfRequired(query_context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context); Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas"); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index b6fb5738337..348dc062885 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,26 +1,25 @@ ===ddl_format_version 1==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 2==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 3==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=0 -query=1 +1 +1 +1 +1 ===ddl_format_version 4==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=1 -query=2 +1 +1 +1 +1 ===exception==== -httpHandler=1 -executeDDLQueryOnCluster=1 -processTask=1 -exceptionCode=60 -exceptionCode=60 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 3f1dc53a20b..e313da78354 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -5,31 +5,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# The test cases in this file cover DDLs running on both Replicated database engine and non-Replicated database engine. +# Since the processing flow is a little bit different from each other, in order to share same reference file, +# we compare the expected result and actual result by ourselves. See check_span method below for more detail. + # This function takes following arguments: # $1 - OpenTelemetry Trace Id # $2 - Query # $3 - Query Settings -# $4 - Output device, default is stdout function execute_query() { - if [ -n "${4}" ]; then - output=$4 - else - output="/dev/stdout" - fi - - echo $2 | ${CLICKHOUSE_CURL} \ + # Some queries are supposed to fail, use -f to suppress error messages + echo $2 | ${CLICKHOUSE_CURL} -f \ -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ "${CLICKHOUSE_URL}?${3}" \ - --data @- \ - > $output + --data @- } -# This function takes 3 argument: -# $1 - OpenTelemetry Trace Id -# $2 - Fields +# This function takes following argument: +# $1 - expected +# $2 - OpenTelemetry Trace Id # $3 - operation_name pattern # $4 - extra condition function check_span() @@ -40,24 +37,38 @@ function check_span() extra_condition="" fi -${CLICKHOUSE_CLIENT} -nq " - SYSTEM FLUSH LOGS; + ret=$(${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; - SELECT ${2} - FROM system.opentelemetry_span_log - WHERE finish_date >= yesterday() - AND lower(hex(trace_id)) = '${1}' - AND operation_name like '${3}' - ${extra_condition} - Format TSKV - ;" + SELECT count() + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${2}' + AND operation_name like '${3}' + ${extra_condition};") + + if [ $ret = $1 ]; then + echo 1 + else + echo "[operation_name like '${3}' ${extra_condition}]=$ret, expected: ${1}" + + # echo the span logs to help analyze + ${CLICKHOUSE_CLIENT} -q " + SELECT operation_name, attribute + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) ='${2}' + ORDER BY start_time_us + Format PrettyCompact + " + fi } # # Set up # ${CLICKHOUSE_CLIENT} -q " -DROP TABLE IF EXISTS ddl_test_for_opentelemetry; +DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; " # Support Replicated database engine @@ -72,22 +83,50 @@ for ddl_version in 1 2 3 4; do echo "===ddl_format_version ${ddl_version}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" "/dev/null" + execute_query $trace_id "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" - check_span $trace_id "count() AS httpHandler" "HTTPHandler" - check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" - check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" + check_span 1 $trace_id "HTTPHandler" - # For format_version 4, there should be two 'query' spans, + # For Replcated database engine, it does not call 'executeDDLQueryOnCluster' method, we don't need to check it + if [ $cluster_name = "test_shard_localhost" ]; then + check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" + else + # Only echo a value so that comparison of reference is correct + echo 1 + fi + + if [ $cluster_name = "test_shard_localhost" ]; then + # The tracing is only enabled when entry format version is 4 + if [ $ddl_version = "4" ]; then + expected=1 + else + expected=0 + fi + else + # For Replicated database engine, the tracing is always enabled because it calls DDLWorker::processTask directly + expected=1 + fi + check_span $expected $trace_id "%DDLWorker::processTask%" + + # For queries that tracing are enabled(format version is 4 or Replicated database engine), there should be two 'query' spans, # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. # # For other format, there should be only one 'query' span - # - check_span $trace_id "count() AS query" "query" + if [ $cluster_name = "test_shard_localhost" ]; then + if [ $ddl_version = "4" ]; then + expected=2 + else + expected=1 + fi + else + expected=2 + fi + check_span $expected $trace_id "query" # Remove table - ${CLICKHOUSE_CLIENT} -q " - DROP TABLE IF EXISTS ddl_test_for_opentelemetry; + # Under Replicated database engine, the DDL is executed as ON CLUSTER DDL, so distributed_ddl_output_mode is needed to supress output + ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode none -q " + DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; " done @@ -97,23 +136,27 @@ done # Echo a separator so that the reference file is more clear for reading echo "===exception====" -# Since this query is supposed to fail, we redirect the error message to /dev/null to discard the error message so that it won't pollute the reference file. -# The exception will be checked in the span log trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" "/dev/null" +execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" -check_span $trace_id "count() AS httpHandler" "HTTPHandler" -check_span $trace_id "count() AS executeDDLQueryOnCluster" "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" -check_span $trace_id "count() AS processTask" "%DDLWorker::processTask%" +check_span 1 $trace_id "HTTPHandler" -# There should be two 'query' spans, -# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. -# Both of these two spans contain exception -check_span $trace_id "attribute['clickhouse.exception_code'] AS exceptionCode" "query" +if [ $cluster_name = "test_shard_localhost" ]; then + expected=1 +else + # For Replicated database, executeDDLQueryOnCluster is not called + expected=0 +fi +check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" +check_span $expected $trace_id "%DDLWorker::processTask%" -# -# Tear down -# -${CLICKHOUSE_CLIENT} -q " -DROP TABLE IF EXISTS ddl_test_for_opentelemetry; -" \ No newline at end of file +if [ $cluster_name = "test_shard_localhost" ]; then + # There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # Both of these two spans contain exception + expected=2 +else + # For Replicated database, there should only one query span + expected=1 +fi +# We don't case about the exact value of exception_code, just check it's there. +check_span $expected $trace_id "query" "attribute['clickhouse.exception_code']<>''" From 5b72de031aabf1a0f975181e8b3447f48c947250 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Sat, 24 Sep 2022 13:53:02 +0800 Subject: [PATCH 58/84] Update test case Signed-off-by: Frank Chen --- tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index e313da78354..b055a155acf 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -20,7 +20,7 @@ function execute_query() -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ - "${CLICKHOUSE_URL}?${3}" \ + "${CLICKHOUSE_URL}&${3}" \ --data @- } From 6acdeb84be96cba2df0ae6e8e9db28cdfadb981b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Sep 2022 23:19:57 +0200 Subject: [PATCH 59/84] clickhouse-client: refactor editor execution Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 236 +++++++++++++++++++-------------- 1 file changed, 137 insertions(+), 99 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 75c48f690f8..ef8787bc0a3 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include - namespace { @@ -35,6 +35,132 @@ std::string getEditor() return editor; } +/// See comments in ShellCommand::executeImpl() +/// (for the vfork via dlsym()) +int executeCommand(char * const argv[]) +{ + static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); + if (!real_vfork) + throw std::runtime_error("Cannot find vfork symbol"); + + pid_t pid = reinterpret_cast(real_vfork)(); + + if (-1 == pid) + throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString())); + + /// Child + if (0 == pid) + { + sigset_t mask; + sigemptyset(&mask); + sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + + execvp(argv[0], argv); + _exit(-1); + } + + int status = 0; + do + { + int exited_pid = waitpid(pid, &status, 0); + if (exited_pid != -1) + break; + + if (errno == EINTR) + continue; + + throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString())); + } while (true); + + return status; +} + +void writeRetry(int fd, const std::string & data) +{ + size_t bytes_written = 0; + const char * begin = data.c_str(); + size_t offset = data.size(); + + while (bytes_written != offset) + { + ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); + if ((-1 == res || 0 == res) && errno != EINTR) + throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString())); + bytes_written += res; + } +} +std::string readFile(const std::string & path) +{ + std::ifstream t(path); + std::string str; + t.seekg(0, std::ios::end); + str.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + return str; +} + +/// Simple wrapper for temporary files. +class TemporaryFile +{ +private: + std::string path; + int fd = -1; + +public: + explicit TemporaryFile(const char * pattern) + : path(pattern) + { + size_t dot_pos = path.rfind('.'); + if (dot_pos != std::string::npos) + fd = ::mkstemps(path.data(), path.size() - dot_pos); + else + fd = ::mkstemp(path.data()); + + if (-1 == fd) + throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString())); + } + ~TemporaryFile() + { + try + { + close(); + unlink(); + } + catch (const std::runtime_error & e) + { + fmt::print(stderr, "{}", e.what()); + } + } + + void close() + { + if (fd == -1) + return; + + if (0 != ::close(fd)) + throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString())); + fd = -1; + } + + void write(const std::string & data) + { + if (fd == -1) + throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path)); + + writeRetry(fd, data); + } + + void unlink() + { + if (0 != ::unlink(path.c_str())) + throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString())); + } + + std::string & getPath() { return path; } +}; + /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) @@ -293,116 +419,28 @@ void ReplxxLineReader::addToHistory(const String & line) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } -/// See comments in ShellCommand::executeImpl() -/// (for the vfork via dlsym()) -int ReplxxLineReader::executeEditor(const std::string & path) -{ - std::vector argv0(editor.data(), editor.data() + editor.size() + 1); - std::vector argv1(path.data(), path.data() + path.size() + 1); - char * const argv[] = {argv0.data(), argv1.data(), nullptr}; - - static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); - if (!real_vfork) - { - rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str()); - return -1; - } - - pid_t pid = reinterpret_cast(real_vfork)(); - - if (-1 == pid) - { - rx.print("Cannot vfork: %s\n", errnoToString().c_str()); - return -1; - } - - /// Child - if (0 == pid) - { - sigset_t mask; - sigemptyset(&mask); - sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - - execvp(editor.c_str(), argv); - rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str()); - _exit(-1); - } - - int status = 0; - do - { - int exited_pid = waitpid(pid, &status, 0); - if (exited_pid == -1) - { - if (errno == EINTR) - continue; - - rx.print("Cannot waitpid: %s\n", errnoToString().c_str()); - return -1; - } - else - break; - } while (true); - return status; -} - void ReplxxLineReader::openEditor() { - char filename[] = "clickhouse_replxx_XXXXXX.sql"; - int fd = ::mkstemps(filename, 4); - if (-1 == fd) - { - rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str()); - return; - } + TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql"); + editor_file.write(rx.get_state().text()); + editor_file.close(); - replxx::Replxx::State state(rx.get_state()); - - size_t bytes_written = 0; - const char * begin = state.text(); - size_t offset = strlen(state.text()); - while (bytes_written != offset) + char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr}; + try { - ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); - if ((-1 == res || 0 == res) && errno != EINTR) + if (executeCommand(argv) == 0) { - rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str()); - break; + const std::string & new_query = readFile(editor_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } - bytes_written += res; } - - if (0 != ::close(fd)) + catch (const std::runtime_error & e) { - rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } - - if (0 == executeEditor(filename)) - { - try - { - std::ifstream t(filename); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - rx.set_state(replxx::Replxx::State(str.c_str(), str.size())); - } - catch (...) - { - rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } + rx.print(e.what()); } if (bracketed_paste_enabled) enableBracketedPaste(); - - if (0 != ::unlink(filename)) - rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str()); } void ReplxxLineReader::enableBracketedPaste() From 58b61d8207c21c15e591aa4793d0d7ba6e889c6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:09:53 +0200 Subject: [PATCH 60/84] clickhouse-client: add interactive history search with fzf-like utility Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 44 ++++++++++++++++++++++++++++++++++ base/base/ReplxxLineReader.h | 1 + 2 files changed, 45 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index ef8787bc0a3..32d3d9aafe7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -375,6 +375,14 @@ ReplxxLineReader::ReplxxLineReader( return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); + + /// interactive search in history (ctrlp/fzf/skim) + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); } ReplxxLineReader::~ReplxxLineReader() @@ -443,6 +451,42 @@ void ReplxxLineReader::openEditor() enableBracketedPaste(); } +void ReplxxLineReader::openInteractiveHistorySearch() +{ + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + const std::string & new_query = readFile(output_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); +} + void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index b9ec214d02c..ba2ccf903b6 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -27,6 +27,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; From aaa36e2b259f43a4336d4094069afb460cd322c2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:23:14 +0200 Subject: [PATCH 61/84] clickhouse-client: add support of sk (fzf-like in rust) Signed-off-by: Azat Khuzhin Co-authored-by: Antonio Andelic --- base/base/ReplxxLineReader.cpp | 53 +++++++++++++++++++++++++++++----- base/base/ReplxxLineReader.h | 1 + 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 32d3d9aafe7..04b7ed2bca7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -14,7 +14,10 @@ #include #include #include +#include #include +#include +#include /// is_any_of namespace { @@ -35,6 +38,30 @@ std::string getEditor() return editor; } +std::string getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return sk_bin_path; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return fzf_bin_path; + } + + return {}; +} + /// See comments in ShellCommand::executeImpl() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) @@ -268,6 +295,7 @@ ReplxxLineReader::ReplxxLineReader( replxx::Replxx::highlighter_callback_t highlighter_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) , editor(getEditor()) + , fuzzy_finder(getFuzzyFinder()) { using namespace std::placeholders; using Replxx = replxx::Replxx; @@ -376,13 +404,16 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); - /// interactive search in history (ctrlp/fzf/skim) - auto interactive_history_search = [this](char32_t code) + /// interactive search in history (requires fzf/sk) + if (!fuzzy_finder.empty()) { - openInteractiveHistorySearch(); - return rx.invoke(Replxx::ACTION::REPAINT, code); - }; - rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -453,6 +484,7 @@ void ReplxxLineReader::openEditor() void ReplxxLineReader::openInteractiveHistorySearch() { + assert(!fuzzy_finder.empty()); TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); auto hs(rx.history_scan()); while (hs.next()) @@ -467,8 +499,13 @@ void ReplxxLineReader::openInteractiveHistorySearch() char sh[] = "sh"; char sh_c[] = "-c"; - std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); - char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + std::string fuzzy_finder_command = fmt::format( + "{} --read0 --height=30% < {} > {}", + fuzzy_finder, history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; try { diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index ba2ccf903b6..fea1405a208 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -37,4 +37,5 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; }; From d0f14e1255480dfb7f0b6f31668a1069e99bdf6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 17:39:03 +0200 Subject: [PATCH 62/84] clickhouse-client: proper support of vfork() w/o dlsym() in musl Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 04b7ed2bca7..e1b97e936c2 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -66,7 +66,17 @@ std::string getFuzzyFinder() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) { +#if !defined(USE_MUSL) + /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, + * because of the resolving of symbols in the shared library + * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html + * Therefore, separate the resolving of the symbol from the call. + */ static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif if (!real_vfork) throw std::runtime_error("Cannot find vfork symbol"); From 8cc53a48ae99a765085f44a75fa49314d1f1cc7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 26 Sep 2022 13:32:53 +0200 Subject: [PATCH 63/84] clickhouse-client: tune fzf/sk options to be a real reverse search Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index e1b97e936c2..916d4f9a74d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -513,7 +513,7 @@ void ReplxxLineReader::openInteractiveHistorySearch() /// - SKIM_DEFAULT_OPTIONS /// - FZF_DEFAULT_OPTS std::string fuzzy_finder_command = fmt::format( - "{} --read0 --height=30% < {} > {}", + "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", fuzzy_finder, history_file.getPath(), output_file.getPath()); char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; From 287d1e68b1f5e190629ed39db1369eea0608e46b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 12:22:23 +0000 Subject: [PATCH 64/84] Fix KeeperMap drop again --- src/Storages/StorageKeeperMap.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f6b110bbad0..11b6fe1b8dc 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -456,9 +456,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->removeChildrenRecursive(data_path); + zookeeper->tryRemoveChildrenRecursive(data_path, true); - bool completely_removed = false; + bool drop_done = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,20 +473,33 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - completely_removed = true; + drop_done = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); + { + // valid case when this can happen is if a table checked "dropped" path just before it was created. + // new table will create data/metadata paths again while drop is in progress + // only bad thing that can happen is if we start inserting data into new table while + // we remove data here (some data can be lost) + LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); + + // we need to remove at least "dropped" nodes + Coordination::Requests requests; + ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); + ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); + zookeeper->multi(requests); + drop_done = true; break; + } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return completely_removed; + return drop_done; } void StorageKeeperMap::drop() From 2384761063ac455bf784382d680ecd9f3abe56cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:38:10 +0200 Subject: [PATCH 65/84] Fix drop of completely dropped table --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc0ace576ce..3aabd1a02a7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7487,6 +7487,10 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } + else if (code == Coordination::Error::ZNONODE) + { + LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); + } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From e20d3803c43128f11bffd7adef5d0e7118fc3a63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:40:25 +0200 Subject: [PATCH 66/84] Better fix --- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3aabd1a02a7..552035f478c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7451,8 +7451,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); + bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) createTableSharedID(); return toString(table_shared_id); @@ -7487,10 +7488,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } - else if (code == Coordination::Error::ZNONODE) - { - LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); - } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From ec35ff9cd6f1c0e9d8190c64226a1ea42782f2a1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 13:32:12 +0000 Subject: [PATCH 67/84] Log git hash during startup We currently only log a compiler-generated "build id" at startup which is different for each build. That makes it useless to determine the exact source code state in tests (e.g. BC test) and from user log files (e.g. if someone compiled an intermediate version of ClickHouse). Current log message: Starting ClickHouse 22.10.1.1 with revision 54467, build id: 6F35820328F89C9F36E91C447FF9E61CAF0EF019, PID 42633 New log message: Starting ClickHouse 22.10.1.1 (revision 54467, git hash: b6b1f7f763f94ffa12133679a6f80342dd1c3afe, build id: 47B12BE61151926FBBD230DE42F3B7A6652AC482), PID 981813 --- CMakeLists.txt | 39 ++++++++++++++++++++++++++++- cmake/git_status.cmake | 22 ---------------- src/Daemon/BaseDaemon.cpp | 26 +++++++++++-------- src/Daemon/BaseDaemon.h | 3 ++- src/Daemon/CMakeLists.txt | 4 +++ src/Daemon/GitHash.generated.cpp.in | 10 ++++++++ src/Storages/System/CMakeLists.txt | 36 +++----------------------- 7 files changed, 72 insertions(+), 68 deletions(-) delete mode 100644 cmake/git_status.cmake create mode 100644 src/Daemon/GitHash.generated.cpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..b0accceddc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,44 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) -include (cmake/git_status.cmake) + +find_package(Git) +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake deleted file mode 100644 index c1047c0ccbf..00000000000 --- a/cmake/git_status.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Print the status of the git repository (if git is available). -# This is useful for troubleshooting build failure reports - -find_package(Git) - -if (Git_FOUND) - - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_ID - OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) - -else() - message(STATUS "Git could not be found.") -endif() diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index d449768935a..157255bba12 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -266,8 +266,8 @@ private: { size_t pos = message.find('\n'); - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos)); + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -315,14 +315,14 @@ private: if (query_id.empty()) { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context) } @@ -838,6 +838,7 @@ static void blockSignals(const std::vector & signals) throw Poco::Exception("Cannot block signal."); } +extern String getGitHash(); void BaseDaemon::initializeTerminationAndSignalProcessing() { @@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #if defined(__ELF__) && !defined(OS_FREEBSD) String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); if (build_id_hex.empty()) - build_id_info = "no build id"; + build_id = ""; else - build_id_info = "build id: " + build_id_hex; + build_id = build_id_hex; #else - build_id_info = "no build id"; + build_id = ""; #endif + git_hash = getGitHash(); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 1b67ca986a8..d248ad9cec9 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -172,7 +172,8 @@ protected: DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - String build_id_info; + String build_id; + String git_hash; String stored_binary_hash; std::vector handled_signals; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 78c133d9893..7499d75d514 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,7 +1,11 @@ +set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") +configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) + add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp + ${GENERATED_GIT_HASH_CPP} ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in new file mode 100644 index 00000000000..833e9304b29 --- /dev/null +++ b/src/Daemon/GitHash.generated.cpp.in @@ -0,0 +1,10 @@ +// .cpp autogenerated by cmake + +#include + +static const String GIT_HASH = "@GIT_HASH@"; + +String getGitHash() +{ + return GIT_HASH; +} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index efc4c0ed37b..d2f7a5426db 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,49 +2,18 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") + get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) - get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) - -find_package(Git) -if(Git_FOUND) - # The commit's git hash, and whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Git branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) endfunction() + generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") @@ -78,6 +47,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) clickhouse_embed_binaries( TARGET information_schema_metadata RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" From 5c8ce2f543dd27eb623a1009ec7d040bdd78bdb5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 14:28:03 +0000 Subject: [PATCH 68/84] More correct --- src/Storages/StorageKeeperMap.cpp | 101 +++++++++++++++--------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 11b6fe1b8dc..bde6c4df80b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap( for (size_t i = 0; i < 1000; ++i) { + std::string stored_metadata_string; + auto exists = client->tryGet(metadata_path, stored_metadata_string); + + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + root_path, + stored_metadata_string); + + auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); + + // tables_path was removed with drop + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); + continue; + } + else if (code != Coordination::Error::ZOK) + { + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path); + } + + return; + } + if (client->exists(dropped_path)) { LOG_INFO(log, "Removing leftover nodes"); @@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap( } } - std::string stored_metadata_string; - auto exists = client->tryGet(metadata_path, stored_metadata_string); + Coordination::Requests create_requests + { + zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent), + }; - if (exists) + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - root_path, - stored_metadata_string); + LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + continue; } - else + else if (code != Coordination::Error::ZOK) { - auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent); - if (code == Coordination::Error::ZNODEEXISTS) - continue; - else if (code != Coordination::Error::ZOK) - throw Coordination::Exception(code, metadata_path); + zkutil::KeeperMultiException::check(code, create_requests, create_responses); } - client->createIfNotExists(tables_path, ""); - auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); - - if (code == Coordination::Error::ZOK) - { - // metadata now should be guaranteed to exist because we added our UUID to the tables_path - client->createIfNotExists(data_path, ""); - table_is_valid = true; - return; - } - - if (code == Coordination::Error::ZNONODE) - LOG_INFO(log, "Metadata nodes were deleted in background, will retry"); - else - throw Coordination::Exception(code, table_path); + table_is_valid = true; + return; } throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); @@ -456,9 +470,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->tryRemoveChildrenRecursive(data_path, true); + zookeeper->removeChildrenRecursive(data_path); - bool drop_done = false; + bool completely_removed = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,33 +487,20 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - drop_done = true; + completely_removed = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - { - // valid case when this can happen is if a table checked "dropped" path just before it was created. - // new table will create data/metadata paths again while drop is in progress - // only bad thing that can happen is if we start inserting data into new table while - // we remove data here (some data can be lost) - LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); - - // we need to remove at least "dropped" nodes - Coordination::Requests requests; - ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); - ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); - zookeeper->multi(requests); - drop_done = true; + LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); break; - } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return drop_done; + return completely_removed; } void StorageKeeperMap::drop() From 9711950c35edfe6f5eadb9c96a08a26150d41939 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 15:04:56 +0000 Subject: [PATCH 69/84] Fix build --- programs/keeper/Keeper.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6d487a68111..fdfe0cef2b3 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -490,8 +490,9 @@ int Keeper::main(const std::vector & /*args*/) void Keeper::logRevision() const { Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } From 922834ccde612f81ac5da61c72ef2f0afdadd64c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 19:31:08 +0200 Subject: [PATCH 70/84] minor fixes --- src/Databases/DatabaseReplicatedWorker.cpp | 4 ++++ .../02423_ddl_for_opentelemetry.reference | 10 ---------- .../0_stateless/02423_ddl_for_opentelemetry.sh | 15 ++++++--------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index a63235b3db0..8c2983e1939 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -221,6 +221,10 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr /// NOTE Possibly it would be better to execute initial query on the most up-to-date node, /// but it requires more complex logic around /try node. + OpenTelemetry::SpanHolder span(__FUNCTION__); + span.addAttribute("clickhouse.cluster", database->getDatabaseName()); + entry.tracing_context = OpenTelemetry::CurrentContext(); + auto zookeeper = getAndSetZooKeeper(); UInt32 our_log_ptr = getLogPointer(); UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr")); diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference index 348dc062885..9c440ab4c67 100644 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -1,13 +1,3 @@ -===ddl_format_version 1==== -1 -1 -1 -1 -===ddl_format_version 2==== -1 -1 -1 -1 ===ddl_format_version 3==== 1 1 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index b055a155acf..6164ff97d9f 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed +# Tags: zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -16,7 +16,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function execute_query() { # Some queries are supposed to fail, use -f to suppress error messages - echo $2 | ${CLICKHOUSE_CURL} -f \ + echo $2 | ${CLICKHOUSE_CURL_COMMAND} -q -s --max-time 180 \ -X POST \ -H "traceparent: 00-$1-5150000000000515-01" \ -H "tracestate: a\nb cd" \ @@ -75,10 +75,9 @@ DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'") # -# Normal cases for ALL distributed_ddl_entry_format_version. # Only format_version 4 enables the tracing # -for ddl_version in 1 2 3 4; do +for ddl_version in 3 4; do # Echo a separator so that the reference file is more clear for reading echo "===ddl_format_version ${ddl_version}====" @@ -87,12 +86,10 @@ for ddl_version in 1 2 3 4; do check_span 1 $trace_id "HTTPHandler" - # For Replcated database engine, it does not call 'executeDDLQueryOnCluster' method, we don't need to check it if [ $cluster_name = "test_shard_localhost" ]; then check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" else - # Only echo a value so that comparison of reference is correct - echo 1 + check_span 1 $trace_id "%tryEnqueueAndExecuteEntry%" "attribute['clickhouse.cluster']='${cluster_name}'" fi if [ $cluster_name = "test_shard_localhost" ]; then @@ -137,14 +134,14 @@ done echo "===exception====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" +execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" 2>&1| grep -Fv "UNKNOWN_TABLE" check_span 1 $trace_id "HTTPHandler" if [ $cluster_name = "test_shard_localhost" ]; then expected=1 else - # For Replicated database, executeDDLQueryOnCluster is not called + # For Replicated database it will fail on initiator before enqueueing distributed DDL expected=0 fi check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" From 0f6a44efef4ca21cc922da0b1a67bf40f108b9db Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 20:11:30 +0200 Subject: [PATCH 71/84] fix missing metadata_version for old tables --- .../ReplicatedMergeTreeAttachThread.cpp | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index ba4979e57f2..90a28c373c7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int REPLICA_STATUS_CHANGED; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run() { if (const auto * coordination_exception = dynamic_cast(&e)) needs_retry = Coordination::isHardwareError(coordination_exception->code); + else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + needs_retry = true; if (needs_retry) { @@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run() void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path) { - /// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup. + /// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER. /// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica), /// but it did not work correctly, because "/metadata" node was re-created on server startup. /// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped. String replica_metadata; const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata); - if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version")) + if (!replica_metadata_exists || replica_metadata.empty()) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer " "should be done through an intermediate version (failed to get metadata or metadata_version for {}," @@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + String replica_metadata_version; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + if (replica_metadata_version_exists) + { + storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); - /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); From 99725e68d1ccf68df4b6ed05af5823cc407a40ed Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 19:28:27 +0000 Subject: [PATCH 72/84] Fix standalone keeper build --- programs/keeper/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..ac8f3b667f6 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp TinyContext.cpp From 540729119184db0565015f61ac298605c11b310a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Sep 2022 22:36:12 +0200 Subject: [PATCH 73/84] Revert "ColumnVector: optimize UInt8 index with AVX512VBMI (#41247)" This reverts commit 8de524cb7371ee2f0245239c798e95008f3eb0e8. --- src/Columns/ColumnVector.h | 136 +------------------------------------ 1 file changed, 2 insertions(+), 134 deletions(-) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index f967b2b4039..70a8a9bce4b 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -7,15 +7,11 @@ #include #include #include -#include #include #include #include "config_core.h" -#if USE_MULTITARGET_CODE -# include -#endif namespace DB { @@ -395,124 +391,6 @@ protected: Container data; }; -DECLARE_DEFAULT_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - for (size_t i = 0; i < limit; ++i) - res_data[i] = data[indexes[i]]; -} -); - -DECLARE_AVX512VBMI_SPECIFIC_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - static constexpr UInt64 MASK64 = 0xffffffffffffffff; - const size_t limit64 = limit & ~63; - size_t pos = 0; - size_t data_size = data.size(); - - auto data_pos = reinterpret_cast(data.data()); - auto indexes_pos = reinterpret_cast(indexes.data()); - auto res_pos = reinterpret_cast(res_data.data()); - - if (data_size <= 64) - { - /// one single mask load for table size <= 64 - __mmask64 last_mask = MASK64 >> (64 - data_size); - __m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos); - - /// 64 bytes table lookup using one single permutexvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - /// tail handling - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else if (data_size <= 128) - { - /// table size (64, 128] requires 2 zmm load - __mmask64 last_mask = MASK64 >> (128 - data_size); - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64); - - /// 128 bytes table lookup using one single permute2xvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else - { - if (data_size > 256) - { - /// byte index will not exceed 256 boundary. - data_size = 256; - } - - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_loadu_epi8(data_pos + 64); - __m512i table3, table4; - if (data_size <= 192) - { - /// only 3 tables need to load if size <= 192 - __mmask64 last_mask = MASK64 >> (192 - data_size); - table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128); - table4 = _mm512_setzero_si512(); - } - else - { - __mmask64 last_mask = MASK64 >> (256 - data_size); - table3 = _mm512_loadu_epi8(data_pos + 128); - table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192); - } - - /// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } -} -); - template template ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_t limit) const @@ -521,18 +399,8 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ auto res = this->create(limit); typename Self::Container & res_data = res->getData(); -#if USE_MULTITARGET_CODE - if constexpr (sizeof(T) == 1 && sizeof(Type) == 1) - { - /// VBMI optimization only applicable for (U)Int8 types - if (isArchSupported(TargetArch::AVX512VBMI)) - { - TargetSpecific::AVX512VBMI::vectorIndexImpl(data, indexes, limit, res_data); - return res; - } - } -#endif - TargetSpecific::Default::vectorIndexImpl(data, indexes, limit, res_data); + for (size_t i = 0; i < limit; ++i) + res_data[i] = data[indexes[i]]; return res; } From 6d7de37e3d4c01d9169750f98a131cdb4238e8f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Sep 2022 23:52:14 +0200 Subject: [PATCH 74/84] Small fix in dashboard --- programs/server/dashboard.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index e63a277497a..f013e3ac064 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) { sync.sub(plots[idx]); /// Set title - const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ); + const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : ''; chart.querySelector('.title').firstChild.data = title; } From 588a5e5a42224e2e85f7878ed6fd1b0e881c85b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 07:29:18 +0000 Subject: [PATCH 75/84] Simplify a bit --- programs/keeper/CMakeLists.txt | 2 +- src/Daemon/CMakeLists.txt | 5 ++--- src/Daemon/GitHash.cpp.in | 8 ++++++++ src/Daemon/GitHash.generated.cpp.in | 10 ---------- src/Storages/System/CMakeLists.txt | 6 ++---- ...nerated.cpp.in => StorageSystemBuildOptions.cpp.in} | 2 +- 6 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 src/Daemon/GitHash.cpp.in delete mode 100644 src/Daemon/GitHash.generated.cpp.in rename src/Storages/System/{StorageSystemBuildOptions.generated.cpp.in => StorageSystemBuildOptions.cpp.in} (98%) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index ac8f3b667f6..ce176ccade5 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) - # Sraight list of all required sources + # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 7499d75d514..f02fd69aa79 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,11 +1,10 @@ -set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") -configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) +configure_file(GitHash.cpp.in GitHash.generated.cpp) add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - ${GENERATED_GIT_HASH_CPP} + GitHash.generated.cpp ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in new file mode 100644 index 00000000000..4a2da793fc2 --- /dev/null +++ b/src/Daemon/GitHash.cpp.in @@ -0,0 +1,8 @@ +// File was generated by CMake + +#include + +String getGitHash() +{ + return "@GIT_HASH@"; +} diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in deleted file mode 100644 index 833e9304b29..00000000000 --- a/src/Daemon/GitHash.generated.cpp.in +++ /dev/null @@ -1,10 +0,0 @@ -// .cpp autogenerated by cmake - -#include - -static const String GIT_HASH = "@GIT_HASH@"; - -String getGitHash() -{ - return GIT_HASH; -} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index d2f7a5426db..6bc080045f8 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,8 +2,6 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") - get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) @@ -11,14 +9,14 @@ function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) - configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) + configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) -list (APPEND storages_system_sources ${CONFIG_BUILD}) +list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp) add_custom_target(generate-contributors ./StorageSystemContributors.sh diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in similarity index 98% rename from src/Storages/System/StorageSystemBuildOptions.generated.cpp.in rename to src/Storages/System/StorageSystemBuildOptions.cpp.in index dde90ce459a..117d97d2cfd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// .cpp autogenerated by cmake +// File was generated by CMake const char * auto_config_build[] { From 19062e9d9743f6a926d24fa26abe1f3b56cd2354 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 27 Sep 2022 14:26:45 +0300 Subject: [PATCH 76/84] Update src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp Co-authored-by: Antonio Andelic --- src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 90a28c373c7..7f91ffee1fe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -146,7 +146,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); if (replica_metadata_version_exists) { - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + storage.metadata_version = parse(replica_metadata_version); } else { From 823d8fb6cd4bf900564e68caedffdfa57b359ac6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:43:31 +0000 Subject: [PATCH 77/84] Move git calls back into git.cmake + renamed the file from originally "git_status.cmake" to "git.cmake" (because we not longer run only "git status") --- CMakeLists.txt | 39 +-------------------------------------- cmake/git.cmake | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 38 deletions(-) create mode 100644 cmake/git.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b0accceddc3..c737046a5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,44 +18,7 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) - -find_package(Git) -# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS -if (Git_FOUND) - # Commit hash + whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_HASH}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) -else() - message(STATUS "Git could not be found.") -endif() +include (cmake/git.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git.cmake b/cmake/git.cmake new file mode 100644 index 00000000000..93f38fd389c --- /dev/null +++ b/cmake/git.cmake @@ -0,0 +1,42 @@ +find_package(Git) + +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS. +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() + From 1f3f86e5bfd5c1358e24a7b423495ec3e312bb68 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:46:56 +0000 Subject: [PATCH 78/84] Cosmetics --- cmake/git.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/git.cmake b/cmake/git.cmake index 93f38fd389c..397ec3cd081 100644 --- a/cmake/git.cmake +++ b/cmake/git.cmake @@ -31,7 +31,7 @@ if (Git_FOUND) OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "HEAD's commit hash ${GIT_HASH}") + message(STATUS "Git HEAD commit hash: ${GIT_HASH}") execute_process( COMMAND ${GIT_EXECUTABLE} status From 2f237a8a2c73009699bf176d74acc0a38eb7d72b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Sep 2022 15:30:18 +0200 Subject: [PATCH 79/84] Update registerStorageMergeTree.cpp --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4274386e393..6982521f76a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -683,8 +683,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) { - auto storage_policy = args.getContext()->getStoragePolicy(storage_settings->storage_policy); - return std::make_shared( zookeeper_path, replica_name, From 728fe5d06fcb4e0866b8db584dcd5b6475473a4b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 27 Sep 2022 16:00:44 +0200 Subject: [PATCH 80/84] Change log level --- src/Storages/StorageKeeperMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index bde6c4df80b..28061aaaf48 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -384,7 +384,7 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryMulti(create_requests, create_responses); if (code == Coordination::Error::ZNODEEXISTS) { - LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); continue; } else if (code != Coordination::Error::ZOK) From 4be153cbd326d47a22b3b1d13466bd02f30a7a6f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 27 Sep 2022 10:21:35 -0400 Subject: [PATCH 81/84] fix link from intro --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..972acac8aaa 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). ## Normal View From 44d3eccf4ca99cb4210cb2e52226dfceafc377f6 Mon Sep 17 00:00:00 2001 From: mosinnik Date: Tue, 27 Sep 2022 19:13:40 +0300 Subject: [PATCH 82/84] Update external-data.md fix lost double hyphens --- .../engines/table-engines/special/external-data.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 95ae1aa9059..b98039f768a 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные, Таких секций может быть несколько - по числу передаваемых таблиц. -**–external** - маркер начала секции. -**–file** - путь к файлу с дампом таблицы, или -, что обозначает stdin. -Из stdin может быть считана только одна таблица. +- **--external** - маркер начала секции. +- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`. +Из `stdin` может быть считана только одна таблица. Следующие параметры не обязательные: -**–name** - имя таблицы. Если не указано - используется _data. -**–format** - формат данных в файле. Если не указано - используется TabSeparated. +- **--name** - имя таблицы. Если не указано - используется _data. +- **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -**–types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … -**–structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. From 4f23f6ef259d1f2b772f034670e63fab95abc376 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 27 Sep 2022 14:07:35 -0400 Subject: [PATCH 83/84] fix other links Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 972acac8aaa..14c06ee0336 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View From 134157df3d307f816eac6df77acd66edba4c8d3e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Sep 2022 22:42:39 +0200 Subject: [PATCH 84/84] Update storage_conf.xml --- tests/config/config.d/storage_conf.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index dcf4d8e9100..a2a7f5cc750 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -8,6 +8,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -15,6 +16,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -22,6 +24,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -29,6 +32,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -36,6 +40,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -43,6 +48,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000