From 0951627b24fbd01bb1fdbbccd17157a4fd16ec54 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 01/87] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b586979b546..0da70193fea 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -69,7 +69,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9e3b60a8e54..a48bfefbcf4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -38,6 +38,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 24649128cee..cd257567cd5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -70,6 +70,7 @@ #include +#include namespace ProfileEvents { @@ -406,10 +407,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From cb4c45340238a148b4a942f145f66c82a9c1e7b9 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 02/87] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 766b1193d44ef0c1310fd606a1fba52661735154 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 03/87] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From 96bea2245b659b06c6c6a1f3ec9ddbc940d72969 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 04/87] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 61543683ecc09878e8855aabb3f36299637c0df7 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 05/87] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 7163b4359e506abaf0da50c1b26688b5aba2f275 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 06/87] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 44bbbd8b9f64901b828eac074f0047f5d565b0c8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 07/87] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 35207909e946de2fa30ab643dcadebb5286f10c2 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 08/87] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 31781601cb459589cb21fbf60d1139d7a3fc1652 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 09/87] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From c2b3aff3d7f54731dbbe93e89ec043d7699c9523 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 10/87] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From aa0c0cf2f976204981bf46b87f85d7ec06f8602f Mon Sep 17 00:00:00 2001 From: UnamedRus Date: Thu, 4 Aug 2022 00:02:44 +0300 Subject: [PATCH 11/87] Add test for ignore function in PARTITION KEY --- .../queries/1_stateful/00175_partition_by_ignore.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/queries/1_stateful/00175_partition_by_ignore.sql diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/1_stateful/00175_partition_by_ignore.sql new file mode 100644 index 00000000000..102ed6104d3 --- /dev/null +++ b/tests/queries/1_stateful/00175_partition_by_ignore.sql @@ -0,0 +1,11 @@ +SELECT '-- check that partition key with ignore works correctly'; + +DROP TABLE IF EXISTS partition_by_ignore SYNC; + +CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); +INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); + +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00'); +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts_2 BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00'); + +DROP TABLE IF EXISTS partition_by_ignore SYNC; From 61b7025981846fd7dc360986d5e98f9077f080e5 Mon Sep 17 00:00:00 2001 From: UnamedRus Date: Thu, 4 Aug 2022 00:05:24 +0300 Subject: [PATCH 12/87] Create 00175_partition_by_ignore.reference --- .../queries/1_stateful/00175_partition_by_ignore.reference | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/queries/1_stateful/00175_partition_by_ignore.reference diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/1_stateful/00175_partition_by_ignore.reference new file mode 100644 index 00000000000..30efc31fec7 --- /dev/null +++ b/tests/queries/1_stateful/00175_partition_by_ignore.reference @@ -0,0 +1,7 @@ +-- check that partition key with ignore works correctly +┌─database─┬─table───────────────┬─parts─┬─rows─┬─marks─┐ +│ default │ partition_by_ignore │ 1 │ 29 │ 1 │ +└──────────┴─────────────────────┴───────┴──────┴───────┘ +┌─database─┬─table───────────────┬─parts─┬─rows─┬─marks─┐ +│ default │ partition_by_ignore │ 1 │ 29 │ 1 │ +└──────────┴─────────────────────┴───────┴──────┴───────┘ From d828e79ea1ee54c3fc43efc71b81542bdb9243f2 Mon Sep 17 00:00:00 2001 From: UnamedRus Date: Thu, 4 Aug 2022 15:45:06 +0300 Subject: [PATCH 13/87] Update 00175_partition_by_ignore.reference --- .../1_stateful/00175_partition_by_ignore.reference | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/1_stateful/00175_partition_by_ignore.reference index 30efc31fec7..53eb1f877a9 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.reference +++ b/tests/queries/1_stateful/00175_partition_by_ignore.reference @@ -1,7 +1,3 @@ -- check that partition key with ignore works correctly -┌─database─┬─table───────────────┬─parts─┬─rows─┬─marks─┐ -│ default │ partition_by_ignore │ 1 │ 29 │ 1 │ -└──────────┴─────────────────────┴───────┴──────┴───────┘ -┌─database─┬─table───────────────┬─parts─┬─rows─┬─marks─┐ -│ default │ partition_by_ignore │ 1 │ 29 │ 1 │ -└──────────┴─────────────────────┴───────┴──────┴───────┘ +default partition_by_ignore 1 29 1 +default partition_by_ignore 1 29 1 From 87182ccd51c1249f5b7d36f14ad8912aa3c43b30 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 14/87] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c399f01c565..1407395bf89 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 08a3df0a3e3..9d5535aa923 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index cdddd28adeb..20f4fa559f9 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -72,6 +72,7 @@ #include #include +#include namespace ProfileEvents { @@ -396,10 +397,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..ad7ad807d03 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..7811ebba9ab --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..d425659499d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..1db05d3c07a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9beeeda55ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..fee8cdb612b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..3ab3c82f1be --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..0a9fa1fc4df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..25aa4e6b83c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..9f226c2fc82 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..7dea87eef25 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..1eed2d00845 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..f7422c02bca --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..426ac29fe6a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..8d450799785 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..1266b6e732d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..8ffc5f77f90 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From 10f87612ebf599016e3b1ea47083f67363132ef8 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 15/87] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 45e8d29542f3a373d0b436f82b40a0cd2d608403 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 16/87] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90..6d33ed20f33 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From e20b2ed6eb19c3f471e94a6d7cbdaecd4eeb7a66 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 17/87] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 844bd7c3d7975a571c6c28a6de77390aef16eb69 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 18/87] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f33..1ce82cab3bd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From fdaffac96b20c49c6ebed4c3babac2aa64e9fd9c Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 19/87] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd..ee1e5fa6d8c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 20758da3947550dc41445dea09eb6c9d91ddd1a3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 20/87] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a..726f28308ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c..cb0b49aecbb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 08022a818925c708807341c5631c6482bd17ef6e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 21/87] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308ee..90b37ba8aea 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55ef..4a9a13cf14f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc82..70e3283ee3e 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 516a6c0844543d44d34feca5314b74000dff4f87 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 22/87] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea..260c9070d51 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14f..a780e18d333 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 30ce50faff20570d379861286b85f46bc866070e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 23/87] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d51..60fa022f9bb 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 8ee2a40a4c49c10c76005e535ca295da5ee8e696 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Tue, 16 Aug 2022 20:10:44 -0400 Subject: [PATCH 24/87] adding missing headers --- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index a780e18d333..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index f1fc13d2c48..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { From c2c457ea0e44a2453474153a78a3a133772ae7f0 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 17 Aug 2022 06:03:41 -0700 Subject: [PATCH 25/87] Kusto-phase1: Change the dialect to Enum, rename sql_dialect to dialect, set limit to subquery --- src/Client/ClientBase.cpp | 4 ++-- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 +++- src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/executeQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLLimit.cpp | 9 ++++++++- src/Parsers/Kusto/ParserKQLLimit.h | 5 +++++ src/Parsers/Kusto/ParserKQLOperators.h | 1 + src/Parsers/Kusto/ParserKQLQuery.cpp | 8 +++++++- src/Parsers/Kusto/ParserKQLTable.cpp | 1 + src/Parsers/tests/gtest_Parser.cpp | 12 ++++++------ 11 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1407395bf89..871a7849d5b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -308,9 +308,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; - const String & sql_dialect = settings.sql_dialect; + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); else parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d5535aa923..24f6d610a81 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,7 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ - M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ + M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 616026520db..54e1f882d58 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) - +IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, + {{"clickhouse", Dialect::clickhouse}, + {"kusto", Dialect::kusto}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 308d53ff690..3f52fa44237 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +enum class Dialect +{ + clickhouse, + kusto, + kusto_auto, +}; + +DECLARE_SETTING_ENUM(Dialect) } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 20f4fa559f9..8bd629f1adc 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -397,10 +397,9 @@ static std::tuple executeQueryImpl( String query_table; try { - const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto" && !internal) + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 4f7eddd9662..ece04f644cc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include namespace DB { @@ -46,7 +48,12 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) return false; pos = begin; diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index d425659499d..c234985b0a6 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,10 +8,15 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { +public: + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index ed6ebba2441..64af156f505 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f48..7f6fcbcdb70 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -95,8 +96,14 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; + kql_limit_p.setTableName(table_name); if (!kql_limit_p.parse(pos, limit_length, expected)) return false; + else + { + if (limit_length) + tables = std::move(limit_length); + } if (!kql_filter_p.parse(pos, where_expression, expected)) return false; @@ -114,7 +121,6 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 29fabd5056c..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index cb0b49aecbb..3575e8ba175 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,23 +308,23 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", @@ -336,7 +336,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" }, { "Customers | sort by FirstName asc", From 6b57b219a4997eef0275c3b4e5bcfb2c0968c81f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 20 Aug 2022 20:01:27 -0700 Subject: [PATCH 26/87] Kusto-phase1: remove unused variable --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a31104..f3c402a80be 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -121,7 +121,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String bin_column; String last_string; String column_name; - int character_passed = 0; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { @@ -160,7 +159,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - character_passed++; + if (String(pos->begin, pos->end) == ")") { expr_aggregation = expr_aggregation + " AS " + column_name; From 77e15a24d05411e9d743492d070dc0fd098ae3e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Aug 2022 22:17:49 +0200 Subject: [PATCH 27/87] Add missing tests for legacy geobase --- programs/server/config.d/legacy_geobase.xml | 1 + .../server/config.d/regions_hierarchy.txt | 1 + programs/server/config.d/regions_names_en.txt | 1 + tests/config/install.sh | 3 + tests/config/legacy_geobase.xml | 4 + tests/config/regions_hierarchy.txt | 12 + tests/config/regions_names_en.txt | 12 + .../02411_legacy_geobase.reference | 286 ++++++++++++++++++ .../0_stateless/02411_legacy_geobase.sql | 12 + 9 files changed, 332 insertions(+) create mode 120000 programs/server/config.d/legacy_geobase.xml create mode 120000 programs/server/config.d/regions_hierarchy.txt create mode 120000 programs/server/config.d/regions_names_en.txt create mode 100644 tests/config/legacy_geobase.xml create mode 100644 tests/config/regions_hierarchy.txt create mode 100644 tests/config/regions_names_en.txt create mode 100644 tests/queries/0_stateless/02411_legacy_geobase.reference create mode 100644 tests/queries/0_stateless/02411_legacy_geobase.sql diff --git a/programs/server/config.d/legacy_geobase.xml b/programs/server/config.d/legacy_geobase.xml new file mode 120000 index 00000000000..500f57b7006 --- /dev/null +++ b/programs/server/config.d/legacy_geobase.xml @@ -0,0 +1 @@ +../../../tests/config/legacy_geobase.xml \ No newline at end of file diff --git a/programs/server/config.d/regions_hierarchy.txt b/programs/server/config.d/regions_hierarchy.txt new file mode 120000 index 00000000000..7b48f46aa94 --- /dev/null +++ b/programs/server/config.d/regions_hierarchy.txt @@ -0,0 +1 @@ +../../../tests/config/regions_hierarchy.txt \ No newline at end of file diff --git a/programs/server/config.d/regions_names_en.txt b/programs/server/config.d/regions_names_en.txt new file mode 120000 index 00000000000..523a1077d9d --- /dev/null +++ b/programs/server/config.d/regions_names_en.txt @@ -0,0 +1 @@ +../../../tests/config/regions_names_en.txt \ No newline at end of file diff --git a/tests/config/install.sh b/tests/config/install.sh index 072787efbb3..bd2f7aa2080 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -48,6 +48,7 @@ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/legacy_geobase.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ @@ -74,6 +75,8 @@ ln -sf $SRC_PATH/executable_pool_dictionary.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/regions_hierarchy.txt $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/regions_names_en.txt $DEST_SERVER_PATH/ ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/ ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/ diff --git a/tests/config/legacy_geobase.xml b/tests/config/legacy_geobase.xml new file mode 100644 index 00000000000..3973c55cb86 --- /dev/null +++ b/tests/config/legacy_geobase.xml @@ -0,0 +1,4 @@ + + config.d/regions_hierarchy.txt + config.d/ + diff --git a/tests/config/regions_hierarchy.txt b/tests/config/regions_hierarchy.txt new file mode 100644 index 00000000000..a111da4a825 --- /dev/null +++ b/tests/config/regions_hierarchy.txt @@ -0,0 +1,12 @@ +1 0 0 7000000000 +2 10 3 330000000 +3 2 4 5700000 +4 3 5 330000 +5 4 6 100000 +6 12 3 1500000000 +7 6 4 83000000 +8 7 6 20000000 +9 1 1 1000000000 +10 9 1 600000000 +11 1 1 5300000000 +12 11 1 4700000000 diff --git a/tests/config/regions_names_en.txt b/tests/config/regions_names_en.txt new file mode 100644 index 00000000000..ccd23678289 --- /dev/null +++ b/tests/config/regions_names_en.txt @@ -0,0 +1,12 @@ +1 World +2 USA +3 Colorado +4 Boulder County +5 Boulder +6 China +7 Sichuan +8 Chengdu +9 America +10 North America +11 Eurasia +12 Asia diff --git a/tests/queries/0_stateless/02411_legacy_geobase.reference b/tests/queries/0_stateless/02411_legacy_geobase.reference new file mode 100644 index 00000000000..4fc360d876c --- /dev/null +++ b/tests/queries/0_stateless/02411_legacy_geobase.reference @@ -0,0 +1,286 @@ + +World +USA +Colorado +Boulder County +Boulder +China +Sichuan +Chengdu +America +North America +Eurasia +Asia + 0 +World 0 +USA 0 +Colorado 0 +Boulder County 0 +Boulder 5 Boulder +China 0 +Sichuan 0 +Chengdu 8 Chengdu +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 0 +Colorado 0 +Boulder County 4 Boulder County +Boulder 4 Boulder County +China 0 +Sichuan 0 +Chengdu 0 +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 0 +Colorado 3 Colorado +Boulder County 3 Colorado +Boulder 3 Colorado +China 0 +Sichuan 7 Sichuan +Chengdu 7 Sichuan +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 2 USA +Colorado 2 USA +Boulder County 2 USA +Boulder 2 USA +China 6 China +Sichuan 6 China +Chengdu 6 China +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 10 North America +Colorado 10 North America +Boulder County 10 North America +Boulder 10 North America +China 12 Asia +Sichuan 12 Asia +Chengdu 12 Asia +America 9 America +North America 10 North America +Eurasia 11 Eurasia +Asia 12 Asia + 0 +World 0 +USA 9 America +Colorado 9 America +Boulder County 9 America +Boulder 9 America +China 11 Eurasia +Sichuan 11 Eurasia +Chengdu 11 Eurasia +America 9 America +North America 9 America +Eurasia 11 Eurasia +Asia 11 Eurasia + 0 +World 4294967295 +USA 330000000 +Colorado 5700000 +Boulder County 330000 +Boulder 100000 +China 1500000000 +Sichuan 83000000 +Chengdu 20000000 +America 1000000000 +North America 600000000 +Eurasia 4294967295 +Asia 4294967295 + is not in + is not in World + is not in USA + is not in Colorado + is not in Boulder County + is not in Boulder + is not in China + is not in Sichuan + is not in Chengdu + is not in America + is not in North America + is not in Eurasia + is not in Asia +World is not in +World is in World +World is not in USA +World is not in Colorado +World is not in Boulder County +World is not in Boulder +World is not in China +World is not in Sichuan +World is not in Chengdu +World is not in America +World is not in North America +World is not in Eurasia +World is not in Asia +USA is not in +USA is in World +USA is in USA +USA is not in Colorado +USA is not in Boulder County +USA is not in Boulder +USA is not in China +USA is not in Sichuan +USA is not in Chengdu +USA is in America +USA is in North America +USA is not in Eurasia +USA is not in Asia +Colorado is not in +Colorado is in World +Colorado is in USA +Colorado is in Colorado +Colorado is not in Boulder County +Colorado is not in Boulder +Colorado is not in China +Colorado is not in Sichuan +Colorado is not in Chengdu +Colorado is in America +Colorado is in North America +Colorado is not in Eurasia +Colorado is not in Asia +Boulder County is not in +Boulder County is in World +Boulder County is in USA +Boulder County is in Colorado +Boulder County is in Boulder County +Boulder County is not in Boulder +Boulder County is not in China +Boulder County is not in Sichuan +Boulder County is not in Chengdu +Boulder County is in America +Boulder County is in North America +Boulder County is not in Eurasia +Boulder County is not in Asia +Boulder is not in +Boulder is in World +Boulder is in USA +Boulder is in Colorado +Boulder is in Boulder County +Boulder is in Boulder +Boulder is not in China +Boulder is not in Sichuan +Boulder is not in Chengdu +Boulder is in America +Boulder is in North America +Boulder is not in Eurasia +Boulder is not in Asia +China is not in +China is in World +China is not in USA +China is not in Colorado +China is not in Boulder County +China is not in Boulder +China is in China +China is not in Sichuan +China is not in Chengdu +China is not in America +China is not in North America +China is in Eurasia +China is in Asia +Sichuan is not in +Sichuan is in World +Sichuan is not in USA +Sichuan is not in Colorado +Sichuan is not in Boulder County +Sichuan is not in Boulder +Sichuan is in China +Sichuan is in Sichuan +Sichuan is not in Chengdu +Sichuan is not in America +Sichuan is not in North America +Sichuan is in Eurasia +Sichuan is in Asia +Chengdu is not in +Chengdu is in World +Chengdu is not in USA +Chengdu is not in Colorado +Chengdu is not in Boulder County +Chengdu is not in Boulder +Chengdu is in China +Chengdu is in Sichuan +Chengdu is in Chengdu +Chengdu is not in America +Chengdu is not in North America +Chengdu is in Eurasia +Chengdu is in Asia +America is not in +America is in World +America is not in USA +America is not in Colorado +America is not in Boulder County +America is not in Boulder +America is not in China +America is not in Sichuan +America is not in Chengdu +America is in America +America is not in North America +America is not in Eurasia +America is not in Asia +North America is not in +North America is in World +North America is not in USA +North America is not in Colorado +North America is not in Boulder County +North America is not in Boulder +North America is not in China +North America is not in Sichuan +North America is not in Chengdu +North America is in America +North America is in North America +North America is not in Eurasia +North America is not in Asia +Eurasia is not in +Eurasia is in World +Eurasia is not in USA +Eurasia is not in Colorado +Eurasia is not in Boulder County +Eurasia is not in Boulder +Eurasia is not in China +Eurasia is not in Sichuan +Eurasia is not in Chengdu +Eurasia is not in America +Eurasia is not in North America +Eurasia is in Eurasia +Eurasia is not in Asia +Asia is not in +Asia is in World +Asia is not in USA +Asia is not in Colorado +Asia is not in Boulder County +Asia is not in Boulder +Asia is not in China +Asia is not in Sichuan +Asia is not in Chengdu +Asia is not in America +Asia is not in North America +Asia is in Eurasia +Asia is in Asia +[] [] +[1] ['World'] +[2,10,9,1] ['USA','North America','America','World'] +[3,2,10,9,1] ['Colorado','USA','North America','America','World'] +[4,3,2,10,9,1] ['Boulder County','Colorado','USA','North America','America','World'] +[5,4,3,2,10,9,1] ['Boulder','Boulder County','Colorado','USA','North America','America','World'] +[6,12,11,1] ['China','Asia','Eurasia','World'] +[7,6,12,11,1] ['Sichuan','China','Asia','Eurasia','World'] +[8,7,6,12,11,1] ['Chengdu','Sichuan','China','Asia','Eurasia','World'] +[9,1] ['America','World'] +[10,9,1] ['North America','America','World'] +[11,1] ['Eurasia','World'] +[12,11,1] ['Asia','Eurasia','World'] diff --git a/tests/queries/0_stateless/02411_legacy_geobase.sql b/tests/queries/0_stateless/02411_legacy_geobase.sql new file mode 100644 index 00000000000..bf34834bbaf --- /dev/null +++ b/tests/queries/0_stateless/02411_legacy_geobase.sql @@ -0,0 +1,12 @@ +SELECT regionToName(number::UInt32, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'xy') FROM numbers(13); -- { serverError 1000 } + +SELECT regionToName(number::UInt32, 'en'), regionToCity(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToArea(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToDistrict(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToCountry(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToTopContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToPopulation(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(n1.number::UInt32, 'en') || (regionIn(n1.number::UInt32, n2.number::UInt32) ? ' is in ' : ' is not in ') || regionToName(n2.number::UInt32, 'en') FROM numbers(13) AS n1 CROSS JOIN numbers(13) AS n2; +SELECT regionHierarchy(number::UInt32) AS arr, arrayMap(id -> regionToName(id, 'en'), arr) FROM numbers(13); From 00bcfa830e292bd206c9b3f664a21f34dd51c514 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Aug 2022 22:42:08 +0200 Subject: [PATCH 28/87] Fix fasttest --- tests/queries/0_stateless/02411_legacy_geobase.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02411_legacy_geobase.sql b/tests/queries/0_stateless/02411_legacy_geobase.sql index bf34834bbaf..a7d82f3beb9 100644 --- a/tests/queries/0_stateless/02411_legacy_geobase.sql +++ b/tests/queries/0_stateless/02411_legacy_geobase.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + SELECT regionToName(number::UInt32, 'en') FROM numbers(13); SELECT regionToName(number::UInt32, 'xy') FROM numbers(13); -- { serverError 1000 } From 57a984b6a14826d41920f73afdb7c602fb484d6b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Aug 2022 02:54:45 +0300 Subject: [PATCH 29/87] Update install.sh --- tests/config/install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index bd2f7aa2080..2f483b92882 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -75,8 +75,8 @@ ln -sf $SRC_PATH/executable_pool_dictionary.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/ -ln -sf $SRC_PATH/regions_hierarchy.txt $DEST_SERVER_PATH/ -ln -sf $SRC_PATH/regions_names_en.txt $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/regions_hierarchy.txt $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/regions_names_en.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/ ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/ From b14bd43590ab9e458ace17ceb5581f2370610aa0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Aug 2022 20:25:06 +0200 Subject: [PATCH 30/87] Fix error --- programs/server/config.d/legacy_geobase.xml | 2 +- tests/config/{ => config.d}/legacy_geobase.xml | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/config/{ => config.d}/legacy_geobase.xml (100%) diff --git a/programs/server/config.d/legacy_geobase.xml b/programs/server/config.d/legacy_geobase.xml index 500f57b7006..8d09ccd69bc 120000 --- a/programs/server/config.d/legacy_geobase.xml +++ b/programs/server/config.d/legacy_geobase.xml @@ -1 +1 @@ -../../../tests/config/legacy_geobase.xml \ No newline at end of file +../../../tests/config/config.d/legacy_geobase.xml \ No newline at end of file diff --git a/tests/config/legacy_geobase.xml b/tests/config/config.d/legacy_geobase.xml similarity index 100% rename from tests/config/legacy_geobase.xml rename to tests/config/config.d/legacy_geobase.xml From a66710ca9dc396b52e1aadd2cc86078a69ea631d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Aug 2022 03:36:02 +0300 Subject: [PATCH 31/87] Update legacy_geobase.xml --- tests/config/config.d/legacy_geobase.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/config/config.d/legacy_geobase.xml b/tests/config/config.d/legacy_geobase.xml index 3973c55cb86..352825afd04 100644 --- a/tests/config/config.d/legacy_geobase.xml +++ b/tests/config/config.d/legacy_geobase.xml @@ -1,4 +1,4 @@ - config.d/regions_hierarchy.txt - config.d/ + /etc/clickhouse-server/config.d/regions_hierarchy.txt + /etc/clickhouse-server/config.d/ From 3139d4a79a72812ff8c921a5ff8a904c6fe3e342 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Aug 2022 02:38:46 +0200 Subject: [PATCH 32/87] Fix error --- programs/server/config.d/legacy_geobase.xml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) mode change 120000 => 100644 programs/server/config.d/legacy_geobase.xml diff --git a/programs/server/config.d/legacy_geobase.xml b/programs/server/config.d/legacy_geobase.xml deleted file mode 120000 index 8d09ccd69bc..00000000000 --- a/programs/server/config.d/legacy_geobase.xml +++ /dev/null @@ -1 +0,0 @@ -../../../tests/config/config.d/legacy_geobase.xml \ No newline at end of file diff --git a/programs/server/config.d/legacy_geobase.xml b/programs/server/config.d/legacy_geobase.xml new file mode 100644 index 00000000000..3973c55cb86 --- /dev/null +++ b/programs/server/config.d/legacy_geobase.xml @@ -0,0 +1,4 @@ + + config.d/regions_hierarchy.txt + config.d/ + From 93605bd88ae84d1cd0d02dcdd43fb8e68428cf09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Aug 2022 04:56:35 +0300 Subject: [PATCH 33/87] Update install.sh --- tests/config/install.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index 2f483b92882..8f54967f610 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -48,7 +48,12 @@ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/legacy_geobase.xml $DEST_SERVER_PATH/config.d/ + +# Not supported with fasttest. +if [ "${DST_PATH}" = "/etc/clickhouse-server" ] +then + ln -sf $SRC_PATH/config.d/legacy_geobase.xml $DEST_SERVER_PATH/config.d/ +fi ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ From eab8b7b42d72ee01aabe057290453ed8f21c2e5e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Sep 2022 08:25:08 +0000 Subject: [PATCH 34/87] Always start embedded Keeper in async mode --- programs/server/Server.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d788270ecf9..5e5a1be0b8f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1279,18 +1279,8 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (has_zookeeper) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + global_context->initializeKeeperDispatcher(/* start_async */ true); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & From 74c958931b4f24df1705fda9b03ad8f9e0b344ed Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 21:16:04 -0700 Subject: [PATCH 35/87] Kusto-phase1 : Updated kql pipe flow with some optimizations --- src/Parsers/Kusto/ParserKQLFilter.cpp | 20 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 47 +-- src/Parsers/Kusto/ParserKQLLimit.h | 6 - src/Parsers/Kusto/ParserKQLOperators.cpp | 156 ++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 8 +- src/Parsers/Kusto/ParserKQLProject.cpp | 28 +- src/Parsers/Kusto/ParserKQLProject.h | 6 - src/Parsers/Kusto/ParserKQLQuery.cpp | 353 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLQuery.h | 19 +- src/Parsers/Kusto/ParserKQLSort.cpp | 31 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 43 ++- src/Parsers/Kusto/ParserKQLStatement.h | 7 + src/Parsers/Kusto/ParserKQLSummarize.cpp | 192 +++--------- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- src/Parsers/Kusto/ParserKQLTable.cpp | 21 +- src/Parsers/Kusto/ParserKQLTable.h | 3 +- src/Parsers/tests/gtest_Parser.cpp | 30 +- 17 files changed, 567 insertions(+), 408 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d80..3a399bdccdb 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -10,27 +10,15 @@ namespace DB bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - Pos begin = pos; - String expr; - - KQLOperators convetor; - - for (auto op_po : op_pos) - { - if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; - else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; - } + String expr = getExprFromToken(pos); + ASTPtr where_expression; Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos pos_filter(token_filter, pos.max_depth); - if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index ece04f644cc..bb8e08fd378 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -11,52 +11,17 @@ namespace DB bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; + ASTPtr limit_length; - auto begin = pos; - Int64 min_limit = -1; - auto final_pos = pos; - for (auto op_po: op_pos) - { - auto is_number = [&] - { - for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) - { - if (!isdigit(*ch)) - return false; - } - return true; - }; + auto expr = getExprFromToken(pos); - if (!is_number()) - return false; + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - auto limit_length = std::strtol(op_po->begin,nullptr, 10); - if (-1 == min_limit) - { - min_limit = limit_length; - final_pos = op_po; - } - else - { - if (min_limit > limit_length) - { - min_limit = limit_length; - final_pos = op_po; - } - } - } - - String sub_query = std::format("( SELECT * FROM {} LIMIT {} )", table_name, String(final_pos->begin, final_pos->end)); - - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); - - if (!ParserTablesInSelectQuery().parse(pos_subquery, node, expected)) + if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index c234985b0a6..1585805f0fc 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { -public: - void setTableName(String table_name_) {table_name = table_name_;} - protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - String table_name; }; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 60fa022f9bb..b250f5def60 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -10,9 +12,60 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) { - String new_expr, left_wildcards, right_wildcards; + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::Comma ) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +{ + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + --token_pos; + --token_pos; + return ch_op; + +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +{ + String new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -33,20 +86,45 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; + auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -87,14 +165,23 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { + auto last_op = tokens.back(); + auto last_pos = pos; + switch (op_value) { case KQLOperatorValue::contains: @@ -142,7 +229,6 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -152,9 +238,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: @@ -166,35 +254,67 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) break; case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: - new_expr = "in"; + new_expr = genInOpExpr(pos,op,"in"); break; case KQLOperatorValue::not_in_cs: - new_expr = "not in"; + new_expr = genInOpExpr(pos,op,"not in"); break; case KQLOperatorValue::in: @@ -229,13 +349,11 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - return res; + pos = begin; + return false; } } + diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 64af156f505..9796ae10c07 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t @@ -83,7 +83,7 @@ protected: {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,7 +98,9 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); }; } diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c..e978323d821 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,38 +6,18 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto begin = pos; + ASTPtr select_expression_list; String expr; - if (op_pos.empty()) - expr = "*"; - else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } - expr = getExprFromToken(op_pos.back()); - } + expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) return false; - pos = begin; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index 3ab3c82f1be..b64675beed0 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,15 +8,9 @@ namespace DB class ParserKQLProject : public ParserKQLBase { -public: - void addColumn(String column) {columns.insert(column);} - protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - std::unordered_set columns; }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 7f6fcbcdb70..9fc32da7790 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,120 +8,339 @@ #include #include #include +#include +#include +#include +#include +#include +#include namespace DB { -bool ParserKQLBase :: parsePrepare(Pos & pos) +namespace ErrorCodes { - op_pos.push_back(pos); - return true; + extern const int UNKNOWN_FUNCTION; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + +String ParserKQLBase :: getExprFromPipe(Pos & pos) +{ + uint16_t bracket_count = 0; + auto begin = pos; + auto end = pos; + while (!end->isEnd() && end->type != TokenType::Semicolon) + { + if (end->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (end->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (end->type == TokenType::PipeMark && bracket_count == 0) + break; + + ++end; + } + --end; + return String(begin->begin, end->end); +} + +String ParserKQLBase :: getExprFromToken(Pos & pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::vector tokens; + String alias; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) + { + tokens.push_back(token); + } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + + for (auto token:tokens) + res = res.empty()? token : res +" " + token; return res; } +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +{ + if (op_name == "filter" || op_name == "where") + return std::make_unique(); + else if (op_name == "limit" || op_name == "take") + return std::make_unique(); + else if (op_name == "project") + return std::make_unique(); + else if (op_name == "sort by" || op_name == "order by") + return std::make_unique(); + else if (op_name == "summarize") + return std::make_unique(); + else if (op_name == "table") + return std::make_unique(); + else + return nullptr; +} + bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + auto select_query = std::make_shared(); node = select_query; - - ParserKQLFilter kql_filter_p; - ParserKQLLimit kql_limit_p; - ParserKQLProject kql_project_p; - ParserKQLSort kql_sort_p; - ParserKQLSummarize kql_summarize_p; - ParserKQLTable kql_table_p; - - ASTPtr select_expression_list; ASTPtr tables; - ASTPtr where_expression; - ASTPtr group_expression_list; - ASTPtr order_expression_list; - ASTPtr limit_length; - std::unordered_map kql_parser = { - { "filter",&kql_filter_p}, - { "where",&kql_filter_p}, - { "limit",&kql_limit_p}, - { "take",&kql_limit_p}, - { "project",&kql_project_p}, - { "sort",&kql_sort_p}, - { "order",&kql_sort_p}, - { "summarize",&kql_summarize_p}, - { "table",&kql_table_p} + std::unordered_map kql_parser = + { + { "filter", {"filter", false, false, 3}}, + { "where", {"filter", false, false, 3}}, + { "limit", {"limit", false, true, 3}}, + { "take", {"limit", false, true, 3}}, + { "project", {"project", false, false, 3}}, + { "sort by", {"order by", false, false, 4}}, + { "order by", {"order by", false, false, 4}}, + { "table", {"table", false, false, 3}}, + { "summarize", {"summarize", true, true, 3}} }; std::vector> operation_pos; - operation_pos.push_back(std::make_pair("table",pos)); - String table_name(pos->begin,pos->end); + String table_name(pos->begin, pos->end); - while (!pos->isEnd()) + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - ++pos; - if (pos->type == TokenType::PipeMark) + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + if (pos->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; - String kql_operator(pos->begin,pos->end); + String kql_operator(pos->begin, pos->end); + if (kql_operator == "order" || kql_operator == "sort") + { + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos,expected)) + { + kql_operator = "order by"; + --pos; + } + } if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator,pos)); + operation_pos.push_back(std::make_pair(kql_operator, pos)); } + else + ++pos; } - for (auto &op_pos : operation_pos) - { - auto kql_operator = op_pos.first; - auto npos = op_pos.second; - if (!npos.isValid()) - return false; + auto kql_operator_str = operation_pos.back().first; + auto npos = operation_pos.back().second; + if (!npos.isValid()) + return false; - if (!kql_parser[kql_operator]->parsePrepare(npos)) + auto kql_operator_p = getOperator(kql_operator_str); + + if (!kql_operator_p) + return false; + + if (operation_pos.size() == 1) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + npos = operation_pos.front().second; + if (!ParserKQLTable().parse(npos, node, expected)) + return false; + } + else + { + String project_clause, order_clause, where_clause, limit_clause; + auto last_pos = operation_pos.back().second; + auto last_op = operation_pos.back().first; + + auto set_main_query_clause =[&](String & op, Pos & op_pos) + { + auto op_str = ParserKQLBase::getExprFromPipe(op_pos); + if (op == "project") + project_clause = op_str; + else if (op == "where" || op == "filter") + where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); + else if (op == "limit" || op == "take") + limit_clause = op_str; + else if (op == "order by" || op == "sort by") + order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; + }; + + set_main_query_clause(last_op, last_pos); + + operation_pos.pop_back(); + + if (kql_parser[last_op].need_input) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else + { + while (operation_pos.size() > 0) + { + auto prev_op = operation_pos.back().first; + auto prev_pos = operation_pos.back().second; + + if (kql_parser[prev_op].gen_output) + break; + if (!project_clause.empty() && prev_op == "project") + break; + set_main_query_clause(prev_op, prev_pos); + operation_pos.pop_back(); + last_op = prev_op; + last_pos = prev_pos; + } + } + + if (operation_pos.size() > 0) + { + for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + --last_pos; + + String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + return false; + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + } + else + { + if (!ParserKQLTable().parse(last_pos, node, expected)) + return false; + } + + auto set_query_clasue =[&](String op_str, String op_calsue) + { + auto oprator = getOperator(op_str); + if (oprator) + { + Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + if (!oprator->parse(pos_clause, node, expected)) + return false; + } + return true; + }; + + if (!select_query->select()) + { + if (project_clause.empty()) + project_clause = "*"; + if (!set_query_clasue("project", project_clause)) + return false; + } + + if (!order_clause.empty()) + if (!set_query_clasue("order by", order_clause)) + return false; + + if (!where_clause.empty()) + if (!set_query_clasue("where", where_clause)) + return false; + + if (!limit_clause.empty()) + if (!set_query_clasue("limit", limit_clause)) + return false; + return true; + } + + if (!select_query->select()) + { + auto expr = String("*"); + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!std::make_unique()->parse(new_pos, node, expected)) return false; } - if (!kql_table_p.parse(pos, tables, expected)) + return true; +} + +bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_node; + + if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) return false; - if (!kql_project_p.parse(pos, select_expression_list, expected)) - return false; + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(select_node); - kql_limit_p.setTableName(table_name); - if (!kql_limit_p.parse(pos, limit_length, expected)) - return false; - else - { - if (limit_length) - tables = std::move(limit_length); - } + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; - if (!kql_filter_p.parse(pos, where_expression, expected)) - return false; + node_table_expr->children.emplace_back(node_subquery); - if (!kql_sort_p.parse(pos, order_expression_list, expected)) - return false; + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; - if (!kql_summarize_p.parse(pos, select_expression_list, expected)) - return false; - else - group_expression_list = kql_summarize_p.group_expression_list; + ASTPtr res = std::make_shared(); - select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); - select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + res->children.emplace_back(node_table_in_select_query_emlement); + node = res; return true; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9..42f5f84f031 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,25 +1,32 @@ #pragma once #include +#include namespace DB { class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; - -protected: - std::vector op_pos; - std::vector expressions; - virtual String getExprFromToken(Pos pos); + static String getExprFromToken(Pos & pos); + static String getExprFromPipe(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t & max_depth); }; class ParserKQLQuery : public IParserBase { + protected: + static std::unique_ptr getOperator(String &op_name); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLSubquery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 70e3283ee3e..f7540d729fd 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -10,41 +10,32 @@ namespace DB bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - - auto begin = pos; bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - ParserKeyword by("by"); + auto expr = getExprFromToken(pos); - pos = op_pos.back(); // sort only affected by last one + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); - if (!by.ignore(pos, expected)) + auto pos_backup = new_pos; + if (!order_list.parse(pos_backup, order_expression_list, expected)) return false; - if (!order_list.parse(pos,order_expression_list,expected)) - return false; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - return false; - - pos = op_pos.back(); - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) { - String tmp(pos->begin,pos->end); + String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" or tmp == "asc") has_dir = true; - if (pos->type == TokenType::Comma) + if (new_pos->type == TokenType::Comma) { has_directions.push_back(has_dir); has_dir = false; } - - ++pos; + ++new_pos; } has_directions.push_back(has_dir); @@ -58,13 +49,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) order_expr->nulls_direction = -1; else order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; - } } - node = order_expression_list; + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - pos =begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad22131..573c953c313 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -35,7 +36,6 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - // will support union next phase ASTPtr kql_query; if (!ParserKQLQuery().parse(pos, kql_query, expected)) @@ -58,4 +58,45 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d00845..ef44b2d6c8a 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -41,5 +41,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f3c402a80be..4d8d7753178 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,8 +1,3 @@ -#include -#include -#include -#include -#include #include #include #include @@ -15,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -23,182 +17,64 @@ namespace DB { -std::pair ParserKQLSummarize::removeLastWord(String input) -{ - ReadBufferFromString in(input); - String token; - std::vector temp; - - while (!in.eof()) - { - readStringUntilWhitespace(token, in); - if (in.eof()) - break; - skipWhitespaceIfAny(in); - temp.push_back(token); - } - - String first_part; - for (std::size_t i = 0; i < temp.size() - 1; i++) - { - first_part += temp[i]; - } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } - - return std::make_pair("", ""); -} - -String ParserKQLSummarize::getBinGroupbyString(String expr_bin) -{ - String column_name; - bool bracket_start = false; - bool comma_start = false; - String bin_duration; - - for (char ch : expr_bin) - { - if (comma_start && ch != ')') - bin_duration += ch; - if (ch == ',') - { - comma_start = true; - bracket_start = false; - } - if (bracket_start) - column_name += ch; - if (ch == '(') - bracket_start = true; - } - - std::size_t len = bin_duration.size(); - char bin_type = bin_duration[len - 1]; // y, d, h, m, s - if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) - { - return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; - } - bin_duration = bin_duration.substr(0, len - 1); - - switch (bin_type) - { - case 'y': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" - + bin_duration + " * (12*30*86400))) AS bin_year"; - case 'd': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration - + " * 86400)) AS bin_day"; - case 'h': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration - + " * 3600)) AS bin_hour"; - case 'm': - return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration - + " * 60)) AS bin_minute"; - case 's': - return "toDateTime(" + column_name + ") AS bin_sec"; - default: - return ""; - } -} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (op_pos.empty()) - return true; - if (op_pos.size() != 1) // now only support one summarize - return false; + ASTPtr select_expression_list; + ASTPtr group_expression_list; - auto begin = pos; - - pos = op_pos.back(); String expr_aggregation; String expr_groupby; String expr_columns; - String expr_bin; bool groupby = false; - bool bin_function = false; - String bin_column; - String last_string; - String column_name; + + auto begin = pos; + auto pos_groupby = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "by") - groupby = true; - else { - if (groupby) - { - if (String(pos->begin, pos->end) == "bin" || bin_function) - { - bin_function = true; - expr_bin += String(pos->begin, pos->end); - if (String(pos->begin, pos->end) == ")") - { - expr_bin = getBinGroupbyString(expr_bin); - expr_groupby += expr_bin; - bin_function = false; - } - } - - else - expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; - } - - else - { - if (String(pos->begin, pos->end) == "=") - { - std::pair temp = removeLastWord(expr_aggregation); - expr_aggregation = temp.first; - column_name = temp.second; - } - else - { - if (!column_name.empty()) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end); - - if (String(pos->begin, pos->end) == ")") - { - expr_aggregation = expr_aggregation + " AS " + column_name; - column_name = ""; - } - } - else if (!bin_function) - { - expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; - } - } - } + groupby = true; + auto end = pos; + --end; + expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; + pos_groupby = pos; + ++pos_groupby; } ++pos; } - - if (expr_groupby.empty()) - expr_columns = expr_aggregation; + --pos; + if (groupby) + expr_groupby = String(pos_groupby->begin, pos->end); else - { - if (expr_aggregation.empty()) - expr_columns = expr_groupby; - else - expr_columns = expr_groupby + "," + expr_aggregation; - } - Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); - IParser::Pos pos_columns(token_columns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) + expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; + expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + if (groupby) { - Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); - IParser::Pos postoken_groupby(token_groupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) return false; + node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); } - pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f..1aad02705df 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,16 +5,13 @@ namespace DB { + class ParserKQLSummarize : public ParserKQLBase { -public: - ASTPtr group_expression_list; protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - static std::pair removeLastWord(String input); - static String getBinGroupbyString(String expr_bin); }; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index fadf5305e89..6356ad688b6 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -7,15 +7,6 @@ namespace DB { -bool ParserKQLTable :: parsePrepare(Pos & pos) -{ - if (!op_pos.empty()) - return false; - - op_pos.push_back(pos); - return true; -} - bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords @@ -44,12 +35,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "EXPLAIN" }); - if (op_pos.empty()) - return false; - - auto begin = pos; - pos = op_pos.back(); - + ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); @@ -58,9 +44,10 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) return false; - if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) return false; - pos = begin; + + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d..c67dcb15156 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,11 +8,10 @@ namespace DB class ParserKQLTable : public ParserKQLBase { + protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool parsePrepare(Pos &pos) override; - }; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3575e8ba175..b452bd27642 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -308,27 +308,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 1\n)" + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" + "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, { "Customers | sort by FirstName desc", @@ -360,7 +360,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY Age DESC" + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" }, { "Customers | sort by FirstName nulls first", @@ -408,31 +408,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" }, { "Customers |summarize count() by Occupation", - "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", From 3f65e6b2b1cae1b0bc0e19df43d9d7da79ee5bc4 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 5 Sep 2022 22:27:23 -0700 Subject: [PATCH 36/87] Kusto-phase1 : fixed style, removed trailing whitespaces --- src/Parsers/Kusto/ParserKQLQuery.cpp | 15 +++++---------- src/Parsers/Kusto/ParserKQLStatement.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 9fc32da7790..03cb5a8ad43 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -18,11 +18,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FUNCTION; -} - String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); @@ -95,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto token:tokens) res = res.empty()? token : res +" " + token; return res; } @@ -120,7 +115,7 @@ std::unique_ptr ParserKQLQuery::getOperator(String & op_name) bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - struct KQLOperatorDataFlowState + struct KQLOperatorDataFlowState { String operator_name; bool need_input; @@ -206,7 +201,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserKQLTable().parse(npos, node, expected)) return false; } - else + else { String project_clause, order_clause, where_clause, limit_clause; auto last_pos = operation_pos.back().second; @@ -252,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (operation_pos.size() > 0) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; @@ -274,7 +269,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto set_query_clasue =[&](String op_str, String op_calsue) { auto oprator = getOperator(op_str); - if (oprator) + if (oprator) { Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); IParser::Pos pos_clause(token_clause, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 573c953c313..21e480234d3 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -69,7 +69,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String kql_statement; if (s_lparen.ignore(pos, expected)) - { + { ++paren_count; while (!pos->isEnd()) { diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 4d8d7753178..75eacb1adbd 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -49,7 +49,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte expr_groupby = String(pos_groupby->begin, pos->end); else expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; - + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; From 4a68bfef393354468cb9b64b43dd9dddcd0d51eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 6 Sep 2022 10:58:14 +0000 Subject: [PATCH 37/87] Fix tests with async Keeper start --- tests/integration/helpers/keeper_utils.py | 41 ++++++++ .../test_keeper_and_access_storage/test.py | 3 + tests/integration/test_keeper_auth/test.py | 3 + .../test_keeper_back_to_back/test.py | 2 + .../configs/enable_keeper.xml | 22 ----- .../configs/keeper_conf.xml | 8 -- .../test_keeper_force_recovery/test.py | 62 ++++-------- .../test.py | 54 +++-------- .../test_keeper_four_word_command/test.py | 96 ++++++------------- .../test_keeper_incorrect_config/test.py | 4 +- .../test_keeper_internal_secure/test.py | 3 + .../test_keeper_mntr_pressure/test.py | 41 +++----- .../test.py | 29 +----- .../test_keeper_multinode_simple/test.py | 32 +------ .../integration/test_keeper_nodes_add/test.py | 5 + .../test_keeper_nodes_move/test.py | 4 + .../test_keeper_nodes_remove/test.py | 14 ++- .../test_keeper_persistent_log/test.py | 17 +++- .../test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../configs/enable_keeper_two_nodes_1.xml | 28 ------ .../configs/enable_keeper_two_nodes_2.xml | 28 ------ .../configs/enable_keeper_two_nodes_3.xml | 28 ------ .../test_keeper_restore_from_snapshot/test.py | 3 + .../test_keeper_secure_client/test.py | 3 +- tests/integration/test_keeper_session/test.py | 20 +--- .../test_keeper_snapshot_on_exit/test.py | 2 + .../test.py | 4 +- .../integration/test_keeper_snapshots/test.py | 11 ++- .../test_keeper_snapshots_multinode/test.py | 8 ++ .../configs/enable_keeper1.xml | 34 ------- .../configs/enable_keeper2.xml | 34 ------- .../configs/enable_keeper3.xml | 34 ------- .../test_keeper_three_nodes_start/test.py | 2 + .../test_keeper_three_nodes_two_alive/test.py | 12 ++- .../test_keeper_two_nodes_cluster/test.py | 29 +----- .../test_keeper_znode_time/test.py | 25 +---- .../test_keeper_zookeeper_converter/test.py | 3 + 40 files changed, 218 insertions(+), 640 deletions(-) create mode 100644 tests/integration/helpers/keeper_utils.py delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml delete mode 100644 tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml delete mode 100644 tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml delete mode 100644 tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py new file mode 100644 index 00000000000..681407e5e8c --- /dev/null +++ b/tests/integration/helpers/keeper_utils.py @@ -0,0 +1,41 @@ +import socket +import time + + +def get_keeper_socket(cluster, node, port=9181): + hosts = cluster.get_instance_ip(node.name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, port)) + return client + + +def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): + client = None + try: + client = get_keeper_socket(cluster, node, port) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def wait_until_connected(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_until_quorum_lost(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(cluster, nodes): + for node in nodes: + wait_until_connected(cluster, node) diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index ae6b0085094..72e3582979b 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -15,6 +16,8 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node1) + yield cluster finally: cluster.shutdown() diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 364d93dfc53..e1331c35eeb 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -25,6 +26,7 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -455,6 +457,7 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 73fface02b4..5ae71841004 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -61,6 +62,7 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml deleted file mode 100644 index c1d38a1de52..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - - diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml deleted file mode 100644 index ebb0d98ddf4..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - node1 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f3bb0ca56e3..f7c3787b4d8 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -2,6 +2,7 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time @@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): while zk.retry(zk.exists, path) is None: time.sleep(0.1) @@ -104,9 +74,6 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: @@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster): for node in nodes[CLUSTER_SIZE:]: node.stop_clickhouse() - wait_nodes(nodes[:CLUSTER_SIZE]) + keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE]) node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]] @@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster): node.stop_clickhouse() # wait for node1 to lose quorum - while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, nodes[0]) nodes[0].copy_file_to_container( os.path.join(CONFIG_DIR, "recovered_keeper1.xml"), @@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster): nodes[0].query("SYSTEM RELOAD CONFIG") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG - send_4lw_cmd(nodes[0].name, "rcvr") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) + keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr") + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) # add one node to restore the quorum nodes[CLUSTER_SIZE].copy_file_to_container( @@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster): ) nodes[CLUSTER_SIZE].start_clickhouse() - wait_until_connected(nodes[CLUSTER_SIZE].name) + keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE]) # node1 should have quorum now and accept requests - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name)) @@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster): f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml", ) node.start_clickhouse() - wait_until_connected(node.name) + keeper_utils.wait_until_connected(cluster, node) node_zks.append(get_fake_zk(node.name)) # refresh old zk sessions @@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) for zk in node_zks[:nodes_left]: assert_all_data(zk) diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 0a554e33119..1c0d5e9a306 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -2,10 +2,11 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooRetry CLUSTER_SIZE = 3 @@ -45,47 +46,19 @@ def started_cluster(): def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + hosts=cluster.get_instance_ip(nodename) + ":9181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=10), + command_retry=KazooRetry(max_tries=10), ) _fake_zk_instance.start() return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): - while zk.exists(path) is None: + while zk.retry(zk.exists, path) is None: time.sleep(0.1) - assert zk.get(path)[0] == data.encode() + assert zk.retry(zk.get, path)[0] == data.encode() def close_zk(zk): @@ -93,20 +66,17 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: - wait_nodes(nodes) + keeper_utils.wait_nodes(cluster, nodes) node_zks = [get_fake_zk(node.name) for node in nodes] data_in_cluster = [] def add_data(zk, path, data): - zk.create(path, data.encode()) + zk.retry(zk.create, path, data.encode()) data_in_cluster.append((path, data)) def assert_all_data(zk): @@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster): ) nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) assert_all_data(get_fake_zk(nodes[0].name)) finally: diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..30abc7422c4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -1,6 +1,7 @@ import socket import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -25,6 +26,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -56,28 +61,6 @@ def clear_znodes(): destroy_zk_client(zk) -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - # zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def reset_node_stats(node_name=node1.name): +def reset_node_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"srst") client.recv(10) finally: @@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name): client.close() -def send_4lw_cmd(node_name=node1.name, cmd="ruok"): +def reset_conn_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def reset_conn_stats(node_name=node1.name): - client = None - try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"crst") client.recv(10_000) finally: @@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster): client = None try: wait_nodes() - data = send_4lw_cmd(cmd="ruok") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok") assert data == "imok" finally: close_keeper_socket(client) @@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster): clear_znodes() # reset stat first - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action( @@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster): delete_cnt=2, ) - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") # print(data.decode()) reader = csv.reader(data.split("\n"), delimiter="\t") @@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="srst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst") assert data.strip() == "Server stats reset." - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") assert len(data) != 0 # print(data) @@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="conf") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf") reader = csv.reader(data.split("\n"), delimiter="=") result = {} @@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster): def test_cmd_isro(started_cluster): wait_nodes() - assert send_4lw_cmd(node1.name, "isro") == "rw" - assert send_4lw_cmd(node2.name, "isro") == "ro" + assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw" + assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro" def test_cmd_srvr(started_cluster): @@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster): wait_nodes() clear_znodes() - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="srvr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr") print("srvr output -------------------------------------") print(data) @@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster): try: wait_nodes() clear_znodes() - reset_node_stats(node1.name) - reset_conn_stats(node1.name) + reset_node_stats(node1) + reset_conn_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="stat") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat") print("stat output -------------------------------------") print(data) @@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output -------------------------------------") print(data) @@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="crst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst") print("crst output -------------------------------------") print(data) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output(after crst) -------------------------------------") print(data) @@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, ephemeral_cnt=2) - data = send_4lw_cmd(cmd="dump") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump") print("dump output -------------------------------------") print(data) @@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchs") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs") print("wchs output -------------------------------------") print(data) @@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchc") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc") print("wchc output -------------------------------------") print(data) @@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchp") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp") print("wchp output -------------------------------------") print(data) diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index e0a28b00b4f..9912959611a 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -173,7 +174,7 @@ NORMAL_CONFIG = """ """ -def test_duplicate_endpoint(started_cluster): +def test_invalid_configs(started_cluster): node1.stop_clickhouse() def assert_config_fails(config): @@ -192,5 +193,6 @@ def test_duplicate_endpoint(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2d45e95e4ff..2448a426fe2 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -47,6 +48,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 471767210d6..1468aa01896 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import pytest import random import string @@ -30,6 +31,7 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -37,40 +39,22 @@ def started_cluster(): cluster.shutdown() -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - def test_aggressive_mntr(started_cluster): - def go_mntr(node_name): - for _ in range(100000): - print(node_name, send_4lw_cmd(node_name, "mntr")) + def go_mntr(node): + for _ in range(10000): + try: + print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr")) + except ConnectionRefusedError: + pass - node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) - node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) - node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread = threading.Thread(target=lambda: go_mntr(node1)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3)) node1_thread.start() node2_thread.start() node3_thread.start() @@ -78,8 +62,7 @@ def test_aggressive_mntr(started_cluster): node2.stop_clickhouse() node3.stop_clickhouse() - while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, node1) node1.stop_clickhouse() starters = [] diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index d6d01a5d0a6..06a5cd8dc5a 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -44,6 +45,7 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -55,31 +57,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -92,7 +69,6 @@ def get_fake_zk(nodename, timeout=30.0): @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -296,7 +272,6 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): - wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 694600acc67..b8bdb098c0d 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -43,31 +45,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -78,7 +55,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -120,7 +96,6 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -177,7 +152,6 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -219,7 +193,6 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -244,7 +217,6 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): - wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index c3449534e87..aad674332ac 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -41,9 +42,11 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def test_nodes_add(started_cluster): + keeper_utils.wait_until_connected(cluster, node1) zk_conn = get_fake_zk(node1) for i in range(100): @@ -62,6 +65,7 @@ def test_nodes_add(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) zk_conn2 = get_fake_zk(node2) @@ -93,6 +97,7 @@ def test_nodes_add(started_cluster): node2.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) zk_conn3 = get_fake_zk(node3) for i in range(100): diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 31082846fb8..c816d69e2d1 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -11,6 +11,7 @@ import os import time from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) @@ -33,6 +34,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + yield cluster finally: @@ -41,6 +44,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def get_fake_zk(node, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 13303d320eb..03536f07064 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,6 +2,8 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +import time import os from kazoo.client import KazooClient, KazooState @@ -23,6 +25,7 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -79,9 +82,12 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn3 = get_fake_zk(node3) zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node3.stop_clickhouse() @@ -91,6 +97,7 @@ def test_nodes_remove(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") + zk_conn = get_fake_zk(node1) zk_conn.sync("/test_two_0") @@ -98,8 +105,11 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node2.stop_clickhouse() diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 377fa436a87..d7cc79836a7 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,6 +33,8 @@ def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) + yield cluster finally: @@ -46,6 +49,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -62,7 +70,7 @@ def test_state_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -111,7 +119,7 @@ def test_state_duplicate_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_duplicated_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -119,7 +127,7 @@ def test_state_duplicate_restart(started_cluster): node_zk2.create("/test_state_duplicated_restart/just_test2") node_zk2.create("/test_state_duplicated_restart/just_test3") - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk3 = get_connection_zk("node") @@ -159,6 +167,7 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): + try: node_zk = None node_zk2 = None @@ -176,7 +185,7 @@ def test_ephemeral_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_ephemeral_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index f15e772fd5f..1552abd32e9 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -26,10 +27,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml deleted file mode 100644 index d51e420f733..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml deleted file mode 100644 index 3f1ee1e01a8..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml deleted file mode 100644 index a99bd5d5296..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7270c84bdda..7f2c2e89703 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -24,6 +25,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -84,6 +86,7 @@ def test_recover_from_snapshot(started_cluster): # stale node should recover from leader's snapshot # with some sanitizers can start longer than 5 seconds node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) print("Restarted") try: diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 55e00880da0..81584129052 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import string import os import time @@ -40,4 +41,4 @@ def started_cluster(): def test_connection(started_cluster): # just nothrow - node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") + node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 30db4d9548c..645045e7865 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time import socket import struct @@ -44,25 +45,8 @@ def destroy_zk_client(zk): pass -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for n in [node1]: - wait_node(n) + keeper_utils.wait_nodes(cluster, [node1]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 1ca5888ab4d..933e83414a4 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -27,6 +28,7 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 4351c5ac96f..6a64cf0ac92 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -2,6 +2,7 @@ ##!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool from kazoo.client import KazooClient, KazooState import random @@ -22,7 +23,7 @@ node3 = cluster.add_instance( def start_zookeeper(node): - node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) def stop_zookeeper(node): @@ -66,6 +67,7 @@ def stop_clickhouse(node): def start_clickhouse(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots, node): diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 08f60e538a4..a27ca6f92a5 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -35,6 +36,7 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() + keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -50,6 +52,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -69,7 +76,7 @@ def test_state_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -123,7 +130,7 @@ def test_ephemeral_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 1461f35e6a4..52d4ae71e33 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -20,10 +21,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -94,6 +100,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index e451f969b37..c8476568786 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -31,6 +32,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index f1de469c5a1..591dde6a70a 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -39,6 +40,7 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -48,6 +50,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def delete_with_retry(node_name, path): @@ -74,10 +77,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -110,10 +113,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" + "Connected to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -138,6 +141,7 @@ def test_restart_third_node(started_cluster): node1_zk.create("/test_restart", b"aaaa") node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) assert node3.contains_in_log( "Connected to ZooKeeper (or Keeper) before internal Keeper start" diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 8c0276f7d77..b87dcf6e758 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -29,6 +30,7 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() + keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -40,31 +42,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -75,7 +52,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -107,7 +83,6 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: - wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index bff3d52014e..f2076acc4d2 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -42,29 +43,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): @@ -129,6 +109,7 @@ def test_server_restart(started_cluster): node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue") node3.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node3) node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 50a9ee6a4a7..e459078f8ef 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -11,6 +12,7 @@ from kazoo.exceptions import ( ) import os import time +import socket cluster = ClickHouseCluster(__file__) @@ -60,6 +62,7 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots): From 7f4935b782b4519a6d1fd79fab2ae2aa6f6173ea Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 05:52:31 -0700 Subject: [PATCH 38/87] Kusto-phase1: removed extra spaces --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index b250f5def60..f8e4f9eaab0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -29,7 +29,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Po while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma ) + if (token_pos->type == TokenType::Comma) new_expr = new_expr + logic_op; else new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 03cb5a8ad43..5e07e3c4d9a 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -61,7 +61,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) if (token == "=") { ++pos; - if (String(pos->begin,pos->end) != "~" ) + if (String(pos->begin,pos->end) != "~") { alias = tokens.back(); tokens.pop_back(); From 896174e0ba5a18b79daf59c39b85493a1e905bff Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 12:45:22 -0700 Subject: [PATCH 39/87] Kusto-phase1: fixed small build issue --- src/Parsers/Kusto/ParserKQLQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 5e07e3c4d9a..8591b0f04df 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -90,7 +90,7 @@ String ParserKQLBase :: getExprFromToken(Pos & pos) tokens.push_back(alias); } - for (auto token:tokens) + for (auto const &token : tokens) res = res.empty()? token : res +" " + token; return res; } @@ -231,7 +231,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - while (operation_pos.size() > 0) + while (!operation_pos.empty()) { auto prev_op = operation_pos.back().first; auto prev_pos = operation_pos.back().second; From a0735a5816a751a0cc71886d65e37ff069250df3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 6 Sep 2022 22:28:25 -0700 Subject: [PATCH 40/87] Kusto-phase1: use empty to check vector instead of size --- src/Parsers/Kusto/ParserKQLQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 8591b0f04df..04ee36705a9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -247,7 +247,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (operation_pos.size() > 0) + if (!operation_pos.empty()) { for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) --last_pos; From d62ba01e93661f454cac40cccc14a0f3dc135267 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Thu, 15 Sep 2022 06:25:23 -0700 Subject: [PATCH 41/87] Fix SipHash Endian issue for s390x --- src/Common/SipHash.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6162de48143..6e1138b6510 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,6 +32,11 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define CURRENT_BYTES_IDX(i) (7-i) +#else +#define CURRENT_BYTES_IDX(i) (i) +#endif class SipHash { @@ -55,7 +60,7 @@ private: ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. - current_bytes[7] = static_cast(cnt); + current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; @@ -92,7 +97,7 @@ public: { while (cnt & 7 && data < end) { - current_bytes[cnt & 7] = *data; + current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } @@ -125,13 +130,13 @@ public: current_word = 0; switch (end - data) { - case 7: current_bytes[6] = data[6]; [[fallthrough]]; - case 6: current_bytes[5] = data[5]; [[fallthrough]]; - case 5: current_bytes[4] = data[4]; [[fallthrough]]; - case 4: current_bytes[3] = data[3]; [[fallthrough]]; - case 3: current_bytes[2] = data[2]; [[fallthrough]]; - case 2: current_bytes[1] = data[1]; [[fallthrough]]; - case 1: current_bytes[0] = data[0]; [[fallthrough]]; + case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; + case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; + case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; + case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; + case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; + case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; + case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } @@ -157,8 +162,8 @@ public: void get128(char * out) { finalize(); - unalignedStoreLE(out, v0 ^ v1); - unalignedStoreLE(out + 8, v2 ^ v3); + unalignedStore(out, v0 ^ v1); + unalignedStore(out + 8, v2 ^ v3); } template From f3e8738145b6505a8cc2f48f01bb5767a6d9ea9c Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Sat, 17 Sep 2022 19:48:08 -0700 Subject: [PATCH 42/87] Fixed issues in code review --- src/Common/SipHash.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6e1138b6510..281a65ca36a 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,8 +32,10 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array +/// to ensure correct byte order on different endian machines #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define CURRENT_BYTES_IDX(i) (7-i) +#define CURRENT_BYTES_IDX(i) (7 - i) #else #define CURRENT_BYTES_IDX(i) (i) #endif @@ -230,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } + +#undef CURRENT_BYTES_IDX From f7aba18185baf82fffd2ecb40f80435a9c3a5270 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 07:44:25 +0300 Subject: [PATCH 43/87] Fix test --- tests/queries/1_stateful/00175_partition_by_ignore.reference | 4 ++-- tests/queries/1_stateful/00175_partition_by_ignore.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/1_stateful/00175_partition_by_ignore.reference index 53eb1f877a9..39dffcae642 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.reference +++ b/tests/queries/1_stateful/00175_partition_by_ignore.reference @@ -1,3 +1,3 @@ -- check that partition key with ignore works correctly -default partition_by_ignore 1 29 1 -default partition_by_ignore 1 29 1 +"default","partition_by_ignore",1,29,1 +"default","partition_by_ignore",1,29,1 diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/1_stateful/00175_partition_by_ignore.sql index 102ed6104d3..737d1b59fe3 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.sql +++ b/tests/queries/1_stateful/00175_partition_by_ignore.sql @@ -5,7 +5,7 @@ DROP TABLE IF EXISTS partition_by_ignore SYNC; CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); -EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00'); -EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts_2 BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00'); +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts_2 BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; DROP TABLE IF EXISTS partition_by_ignore SYNC; From 37ae7a8cca56ebbbda0802b2c411ac0fb571687b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:25:27 -0700 Subject: [PATCH 44/87] Kusto-phase1 : apply parser comments to kusto, remove unused variable --- src/Interpreters/executeQuery.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e61494792b0..1a7c5032b02 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -389,12 +389,11 @@ static std::tuple executeQueryImpl( String query_table; try { - const Dialect & dialect = settings.dialect; - - if (dialect == Dialect::kusto && !internal) + if (settings.dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } else From ec852b3faa418765dc3201b893e3ae265663d144 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 18 Sep 2022 20:38:07 -0700 Subject: [PATCH 45/87] Kusto-phase1 : change the parser in ClientBase from shared_ptr to unique_ptr --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f87487dff7c..f407fab68f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::shared_ptr parser; + std::unique_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -304,9 +304,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); else - parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { From 6f956329d5a96ee786a1d3aa34d902534b5ab424 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Mon, 19 Sep 2022 15:26:11 +0200 Subject: [PATCH 46/87] Remove obsolete comment from the config.xml Remove obsolete comment, see commit c059d0a0ee1e13c73cdefb821cb40aa01f6981c1 --- programs/server/config.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index a1e139d9e76..fef45c19d37 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1106,10 +1106,6 @@ system asynchronous_metric_log
- 7000
From 6798b500e9e05cdfbc22ac86830833248890e8df Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 21 Sep 2022 15:12:16 +0000 Subject: [PATCH 47/87] Wait on startup for Keeper --- programs/server/Server.cpp | 12 +++++++++++- src/Coordination/KeeperServer.cpp | 2 +- .../test_keeper_and_access_storage/test.py | 2 -- tests/integration/test_keeper_auth/test.py | 3 --- tests/integration/test_keeper_back_to_back/test.py | 2 -- .../integration/test_keeper_incorrect_config/test.py | 2 -- .../integration/test_keeper_internal_secure/test.py | 3 --- tests/integration/test_keeper_mntr_pressure/test.py | 1 - .../test_keeper_multinode_blocade_leader/test.py | 7 ++++++- .../integration/test_keeper_multinode_simple/test.py | 10 +++++++++- tests/integration/test_keeper_nodes_move/test.py | 2 -- tests/integration/test_keeper_nodes_remove/test.py | 2 -- tests/integration/test_keeper_persistent_log/test.py | 4 ---- .../test_keeper_restore_from_snapshot/test.py | 1 - tests/integration/test_keeper_secure_client/test.py | 1 - .../integration/test_keeper_snapshot_on_exit/test.py | 2 -- tests/integration/test_keeper_snapshots/test.py | 1 - .../test_keeper_snapshots_multinode/test.py | 1 - .../test_keeper_three_nodes_start/test.py | 2 -- .../test_keeper_three_nodes_two_alive/test.py | 9 ++++----- .../test_keeper_two_nodes_cluster/test.py | 7 ++++++- .../test_keeper_zookeeper_converter/test.py | 1 - 22 files changed, 37 insertions(+), 40 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 40b4b646b6e..8a0ce75ca70 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1282,8 +1282,18 @@ int Server::main(const std::vector & /*args*/) if (config().has("keeper_server")) { #if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(/* start_async */ true); + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 42d7d967b1f..08092cf68f1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -705,7 +705,7 @@ void KeeperServer::waitInit() int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) - throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); + LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } std::vector KeeperServer::getDeadSessions() diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index 72e3582979b..6ec307f7082 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -3,7 +3,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) @@ -16,7 +15,6 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node1) yield cluster finally: diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index e1331c35eeb..364d93dfc53 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -26,7 +25,6 @@ SUPERAUTH = "super:admin" def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster @@ -457,7 +455,6 @@ def test_auth_snapshot(started_cluster): ) node.restart_clickhouse() - keeper_utils.wait_until_connected(cluster, node) connection = get_fake_zk() diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 5ae71841004..73fface02b4 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -62,7 +61,6 @@ def stop_zk(zk): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index ec8b14a01e9..95482745b31 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -225,6 +224,5 @@ def test_invalid_configs(started_cluster): "/etc/clickhouse-server/config.d/enable_keeper1.xml", NORMAL_CONFIG ) node1.start_clickhouse() - keeper_utils.wait_until_connected(cluster, node1) assert node1.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py index 2448a426fe2..2d45e95e4ff 100644 --- a/tests/integration/test_keeper_internal_secure/test.py +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -48,8 +47,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 1468aa01896..d351b238ead 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -31,7 +31,6 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index 06a5cd8dc5a..a7a80d90a58 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -45,7 +45,6 @@ TODO remove this when jepsen tests will be written. def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -65,10 +64,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + # in extremely rare case it can take more than 5 minutes in debug build with sanitizer @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( @@ -272,6 +276,7 @@ def restart_replica_for_sure(node, table_name, zk_replica_path): @pytest.mark.timeout(600) def test_blocade_leader_twice(started_cluster): for i in range(100): + wait_nodes() try: for i, node in enumerate([node1, node2, node3]): node.query( diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index b8bdb098c0d..1dcbb290fa8 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -33,7 +33,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -45,6 +44,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -55,6 +58,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_multinode(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -96,6 +100,7 @@ def test_read_write_multinode(started_cluster): def test_watch_on_follower(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") @@ -152,6 +157,7 @@ def test_watch_on_follower(started_cluster): def test_session_expiration(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3", timeout=3.0) @@ -193,6 +199,7 @@ def test_session_expiration(started_cluster): def test_follower_restart(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node1_zk.create("/test_restart_node", b"hello") @@ -217,6 +224,7 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): + wait_nodes() for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index c816d69e2d1..1e3bd95c5e7 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -34,8 +34,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) - yield cluster finally: diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 03536f07064..59bdaadf2e2 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import time import os from kazoo.client import KazooClient, KazooState @@ -25,7 +24,6 @@ node3 = cluster.add_instance( def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index d7cc79836a7..70cc14fe26d 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -33,8 +32,6 @@ def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) - yield cluster finally: @@ -51,7 +48,6 @@ def get_connection_zk(nodename, timeout=30.0): def restart_clickhouse(): node.restart_clickhouse(kill=True) - keeper_utils.wait_until_connected(cluster, node) def test_state_after_restart(started_cluster): diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7f2c2e89703..bc33689dd20 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -25,7 +25,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 81584129052..2a17afac75b 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import string import os import time diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 933e83414a4..1ca5888ab4d 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -1,6 +1,5 @@ import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import os from kazoo.client import KazooClient @@ -28,7 +27,6 @@ def get_fake_zk(node, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index a27ca6f92a5..ce57a852dca 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -36,7 +36,6 @@ def create_random_path(prefix="", depth=1): def started_cluster(): try: cluster.start() - keeper_utils.wait_until_connected(cluster, node) yield cluster diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 52d4ae71e33..a68a34dae2e 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -29,7 +29,6 @@ def wait_nodes(): def started_cluster(): try: cluster.start() - wait_nodes() yield cluster diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index c8476568786..e451f969b37 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -3,7 +3,6 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils import random import string import os @@ -32,7 +31,6 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) node1_zk = get_fake_zk("node1") node1_zk.create("/test_alive", b"aaaa") diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index 591dde6a70a..bd29ded357f 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -40,7 +40,6 @@ def get_fake_zk(nodename, timeout=30.0): def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2, node3]) yield cluster @@ -77,10 +76,10 @@ def test_start_offline(started_cluster): p.map(start, [node2, node3]) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node3.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") @@ -113,10 +112,10 @@ def test_start_non_existing(started_cluster): p.map(start, [node2, node1]) assert node1.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) assert node2.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" + "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start" ) node2_zk = get_fake_zk("node2") diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index b87dcf6e758..c6bc0ebd33a 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -30,7 +30,6 @@ from kazoo.client import KazooClient, KazooState def started_cluster(): try: cluster.start() - keeper_utils.wait_nodes(cluster, [node1, node2]) yield cluster @@ -42,6 +41,10 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2]) + + def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -52,6 +55,7 @@ def get_fake_zk(nodename, timeout=30.0): def test_read_write_two_nodes(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1") node2_zk = get_fake_zk("node2") @@ -83,6 +87,7 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: + wait_nodes() node1_zk = get_fake_zk("node1", timeout=5.0) node2_zk = get_fake_zk("node2", timeout=5.0) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index e459078f8ef..af8d1ca4bf9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -12,7 +12,6 @@ from kazoo.exceptions import ( ) import os import time -import socket cluster = ClickHouseCluster(__file__) From 1c67437aa4650a84a76fb2ed2d1af6feb9ae1247 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 23 Sep 2022 18:37:25 +0000 Subject: [PATCH 48/87] Fix possible crash for SELECT from Merge table with optimize_monotonous_functions_in_order_by eanbled. --- src/Interpreters/MonotonicityCheckVisitor.h | 3 +++ tests/queries/0_stateless/02147_order_by_optimizations.sql | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index 4b9f36ab72d..a7ce0774862 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -70,6 +70,9 @@ public: if (!pos) return false; + if (*pos >= tables.size()) + return false; + if (auto data_type_and_name = tables[*pos].columns.tryGetByName(identifier->shortName())) { arg_data_type = data_type_and_name->type; diff --git a/tests/queries/0_stateless/02147_order_by_optimizations.sql b/tests/queries/0_stateless/02147_order_by_optimizations.sql index 7aa631ff432..3925e92bffc 100644 --- a/tests/queries/0_stateless/02147_order_by_optimizations.sql +++ b/tests/queries/0_stateless/02147_order_by_optimizations.sql @@ -13,3 +13,7 @@ SET optimize_monotonous_functions_in_order_by = 1; EXPLAIN SYNTAX SELECT * FROM t_02147 ORDER BY toStartOfHour(date), v; EXPLAIN SYNTAX SELECT * FROM t_02147_dist ORDER BY toStartOfHour(date), v; EXPLAIN SYNTAX SELECT * FROM t_02147_merge ORDER BY toStartOfHour(date), v; + +drop table t_02147; +CREATE TABLE t_02147 (date DateTime, v UInt32) ENGINE = MergeTree ORDER BY date; +select *, toString(t.v) as s from t_02147_merge as t order by date, s; From cec641a71b15fca2458d4844263418427f199ba8 Mon Sep 17 00:00:00 2001 From: Daniel Kutenin Date: Mon, 26 Sep 2022 13:00:18 +0100 Subject: [PATCH 49/87] Add ldapr for Arm instances --- cmake/cpu_features.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index f9b2f103f49..35eb95a5d47 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -45,6 +45,8 @@ elseif (ARCH_AARCH64) # dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits # but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was # introduced as optional, either in v8.2 [7] or in v8.4 [8]. + # ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code. + # Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15. # # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 @@ -54,7 +56,8 @@ elseif (ARCH_AARCH64) # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs") + # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr") endif () elseif (ARCH_PPC64LE) From 6acdeb84be96cba2df0ae6e8e9db28cdfadb981b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Sep 2022 23:19:57 +0200 Subject: [PATCH 50/87] clickhouse-client: refactor editor execution Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 236 +++++++++++++++++++-------------- 1 file changed, 137 insertions(+), 99 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 75c48f690f8..ef8787bc0a3 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include - namespace { @@ -35,6 +35,132 @@ std::string getEditor() return editor; } +/// See comments in ShellCommand::executeImpl() +/// (for the vfork via dlsym()) +int executeCommand(char * const argv[]) +{ + static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); + if (!real_vfork) + throw std::runtime_error("Cannot find vfork symbol"); + + pid_t pid = reinterpret_cast(real_vfork)(); + + if (-1 == pid) + throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString())); + + /// Child + if (0 == pid) + { + sigset_t mask; + sigemptyset(&mask); + sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + + execvp(argv[0], argv); + _exit(-1); + } + + int status = 0; + do + { + int exited_pid = waitpid(pid, &status, 0); + if (exited_pid != -1) + break; + + if (errno == EINTR) + continue; + + throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString())); + } while (true); + + return status; +} + +void writeRetry(int fd, const std::string & data) +{ + size_t bytes_written = 0; + const char * begin = data.c_str(); + size_t offset = data.size(); + + while (bytes_written != offset) + { + ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); + if ((-1 == res || 0 == res) && errno != EINTR) + throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString())); + bytes_written += res; + } +} +std::string readFile(const std::string & path) +{ + std::ifstream t(path); + std::string str; + t.seekg(0, std::ios::end); + str.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + return str; +} + +/// Simple wrapper for temporary files. +class TemporaryFile +{ +private: + std::string path; + int fd = -1; + +public: + explicit TemporaryFile(const char * pattern) + : path(pattern) + { + size_t dot_pos = path.rfind('.'); + if (dot_pos != std::string::npos) + fd = ::mkstemps(path.data(), path.size() - dot_pos); + else + fd = ::mkstemp(path.data()); + + if (-1 == fd) + throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString())); + } + ~TemporaryFile() + { + try + { + close(); + unlink(); + } + catch (const std::runtime_error & e) + { + fmt::print(stderr, "{}", e.what()); + } + } + + void close() + { + if (fd == -1) + return; + + if (0 != ::close(fd)) + throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString())); + fd = -1; + } + + void write(const std::string & data) + { + if (fd == -1) + throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path)); + + writeRetry(fd, data); + } + + void unlink() + { + if (0 != ::unlink(path.c_str())) + throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString())); + } + + std::string & getPath() { return path; } +}; + /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) @@ -293,116 +419,28 @@ void ReplxxLineReader::addToHistory(const String & line) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } -/// See comments in ShellCommand::executeImpl() -/// (for the vfork via dlsym()) -int ReplxxLineReader::executeEditor(const std::string & path) -{ - std::vector argv0(editor.data(), editor.data() + editor.size() + 1); - std::vector argv1(path.data(), path.data() + path.size() + 1); - char * const argv[] = {argv0.data(), argv1.data(), nullptr}; - - static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); - if (!real_vfork) - { - rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str()); - return -1; - } - - pid_t pid = reinterpret_cast(real_vfork)(); - - if (-1 == pid) - { - rx.print("Cannot vfork: %s\n", errnoToString().c_str()); - return -1; - } - - /// Child - if (0 == pid) - { - sigset_t mask; - sigemptyset(&mask); - sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - - execvp(editor.c_str(), argv); - rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str()); - _exit(-1); - } - - int status = 0; - do - { - int exited_pid = waitpid(pid, &status, 0); - if (exited_pid == -1) - { - if (errno == EINTR) - continue; - - rx.print("Cannot waitpid: %s\n", errnoToString().c_str()); - return -1; - } - else - break; - } while (true); - return status; -} - void ReplxxLineReader::openEditor() { - char filename[] = "clickhouse_replxx_XXXXXX.sql"; - int fd = ::mkstemps(filename, 4); - if (-1 == fd) - { - rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str()); - return; - } + TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql"); + editor_file.write(rx.get_state().text()); + editor_file.close(); - replxx::Replxx::State state(rx.get_state()); - - size_t bytes_written = 0; - const char * begin = state.text(); - size_t offset = strlen(state.text()); - while (bytes_written != offset) + char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr}; + try { - ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); - if ((-1 == res || 0 == res) && errno != EINTR) + if (executeCommand(argv) == 0) { - rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str()); - break; + const std::string & new_query = readFile(editor_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } - bytes_written += res; } - - if (0 != ::close(fd)) + catch (const std::runtime_error & e) { - rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } - - if (0 == executeEditor(filename)) - { - try - { - std::ifstream t(filename); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - rx.set_state(replxx::Replxx::State(str.c_str(), str.size())); - } - catch (...) - { - rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } + rx.print(e.what()); } if (bracketed_paste_enabled) enableBracketedPaste(); - - if (0 != ::unlink(filename)) - rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str()); } void ReplxxLineReader::enableBracketedPaste() From 58b61d8207c21c15e591aa4793d0d7ba6e889c6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:09:53 +0200 Subject: [PATCH 51/87] clickhouse-client: add interactive history search with fzf-like utility Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 44 ++++++++++++++++++++++++++++++++++ base/base/ReplxxLineReader.h | 1 + 2 files changed, 45 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index ef8787bc0a3..32d3d9aafe7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -375,6 +375,14 @@ ReplxxLineReader::ReplxxLineReader( return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); + + /// interactive search in history (ctrlp/fzf/skim) + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); } ReplxxLineReader::~ReplxxLineReader() @@ -443,6 +451,42 @@ void ReplxxLineReader::openEditor() enableBracketedPaste(); } +void ReplxxLineReader::openInteractiveHistorySearch() +{ + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + const std::string & new_query = readFile(output_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); +} + void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index b9ec214d02c..ba2ccf903b6 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -27,6 +27,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; From aaa36e2b259f43a4336d4094069afb460cd322c2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 14:23:14 +0200 Subject: [PATCH 52/87] clickhouse-client: add support of sk (fzf-like in rust) Signed-off-by: Azat Khuzhin Co-authored-by: Antonio Andelic --- base/base/ReplxxLineReader.cpp | 53 +++++++++++++++++++++++++++++----- base/base/ReplxxLineReader.h | 1 + 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 32d3d9aafe7..04b7ed2bca7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -14,7 +14,10 @@ #include #include #include +#include #include +#include +#include /// is_any_of namespace { @@ -35,6 +38,30 @@ std::string getEditor() return editor; } +std::string getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return sk_bin_path; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return fzf_bin_path; + } + + return {}; +} + /// See comments in ShellCommand::executeImpl() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) @@ -268,6 +295,7 @@ ReplxxLineReader::ReplxxLineReader( replxx::Replxx::highlighter_callback_t highlighter_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) , editor(getEditor()) + , fuzzy_finder(getFuzzyFinder()) { using namespace std::placeholders; using Replxx = replxx::Replxx; @@ -376,13 +404,16 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); - /// interactive search in history (ctrlp/fzf/skim) - auto interactive_history_search = [this](char32_t code) + /// interactive search in history (requires fzf/sk) + if (!fuzzy_finder.empty()) { - openInteractiveHistorySearch(); - return rx.invoke(Replxx::ACTION::REPAINT, code); - }; - rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -453,6 +484,7 @@ void ReplxxLineReader::openEditor() void ReplxxLineReader::openInteractiveHistorySearch() { + assert(!fuzzy_finder.empty()); TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); auto hs(rx.history_scan()); while (hs.next()) @@ -467,8 +499,13 @@ void ReplxxLineReader::openInteractiveHistorySearch() char sh[] = "sh"; char sh_c[] = "-c"; - std::string fzf = fmt::format("fzf --read0 --height=30% < {} > {}", history_file.getPath(), output_file.getPath()); - char * const argv[] = {sh, sh_c, fzf.data(), nullptr}; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + std::string fuzzy_finder_command = fmt::format( + "{} --read0 --height=30% < {} > {}", + fuzzy_finder, history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; try { diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index ba2ccf903b6..fea1405a208 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -37,4 +37,5 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; }; From d0f14e1255480dfb7f0b6f31668a1069e99bdf6c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Sep 2022 17:39:03 +0200 Subject: [PATCH 53/87] clickhouse-client: proper support of vfork() w/o dlsym() in musl Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 04b7ed2bca7..e1b97e936c2 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -66,7 +66,17 @@ std::string getFuzzyFinder() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) { +#if !defined(USE_MUSL) + /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, + * because of the resolving of symbols in the shared library + * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html + * Therefore, separate the resolving of the symbol from the call. + */ static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif if (!real_vfork) throw std::runtime_error("Cannot find vfork symbol"); From 8cc53a48ae99a765085f44a75fa49314d1f1cc7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 26 Sep 2022 13:32:53 +0200 Subject: [PATCH 54/87] clickhouse-client: tune fzf/sk options to be a real reverse search Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index e1b97e936c2..916d4f9a74d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -513,7 +513,7 @@ void ReplxxLineReader::openInteractiveHistorySearch() /// - SKIM_DEFAULT_OPTIONS /// - FZF_DEFAULT_OPTS std::string fuzzy_finder_command = fmt::format( - "{} --read0 --height=30% < {} > {}", + "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", fuzzy_finder, history_file.getPath(), output_file.getPath()); char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; From 287d1e68b1f5e190629ed39db1369eea0608e46b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 12:22:23 +0000 Subject: [PATCH 55/87] Fix KeeperMap drop again --- src/Storages/StorageKeeperMap.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f6b110bbad0..11b6fe1b8dc 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -456,9 +456,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->removeChildrenRecursive(data_path); + zookeeper->tryRemoveChildrenRecursive(data_path, true); - bool completely_removed = false; + bool drop_done = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,20 +473,33 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - completely_removed = true; + drop_done = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); + { + // valid case when this can happen is if a table checked "dropped" path just before it was created. + // new table will create data/metadata paths again while drop is in progress + // only bad thing that can happen is if we start inserting data into new table while + // we remove data here (some data can be lost) + LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); + + // we need to remove at least "dropped" nodes + Coordination::Requests requests; + ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); + ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); + zookeeper->multi(requests); + drop_done = true; break; + } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return completely_removed; + return drop_done; } void StorageKeeperMap::drop() From 2384761063ac455bf784382d680ecd9f3abe56cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:38:10 +0200 Subject: [PATCH 56/87] Fix drop of completely dropped table --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc0ace576ce..3aabd1a02a7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7487,6 +7487,10 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } + else if (code == Coordination::Error::ZNONODE) + { + LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); + } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From e20d3803c43128f11bffd7adef5d0e7118fc3a63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Sep 2022 15:40:25 +0200 Subject: [PATCH 57/87] Better fix --- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3aabd1a02a7..552035f478c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7451,8 +7451,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); + bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) createTableSharedID(); return toString(table_shared_id); @@ -7487,10 +7488,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } - else if (code == Coordination::Error::ZNONODE) - { - LOG_WARNING(log, "Shared ID on path {} is impossible to create because table was completely dropped, parts can be dropped without checks (using id {})", zookeeper_table_id_path, id); - } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From ec35ff9cd6f1c0e9d8190c64226a1ea42782f2a1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 13:32:12 +0000 Subject: [PATCH 58/87] Log git hash during startup We currently only log a compiler-generated "build id" at startup which is different for each build. That makes it useless to determine the exact source code state in tests (e.g. BC test) and from user log files (e.g. if someone compiled an intermediate version of ClickHouse). Current log message: Starting ClickHouse 22.10.1.1 with revision 54467, build id: 6F35820328F89C9F36E91C447FF9E61CAF0EF019, PID 42633 New log message: Starting ClickHouse 22.10.1.1 (revision 54467, git hash: b6b1f7f763f94ffa12133679a6f80342dd1c3afe, build id: 47B12BE61151926FBBD230DE42F3B7A6652AC482), PID 981813 --- CMakeLists.txt | 39 ++++++++++++++++++++++++++++- cmake/git_status.cmake | 22 ---------------- src/Daemon/BaseDaemon.cpp | 26 +++++++++++-------- src/Daemon/BaseDaemon.h | 3 ++- src/Daemon/CMakeLists.txt | 4 +++ src/Daemon/GitHash.generated.cpp.in | 10 ++++++++ src/Storages/System/CMakeLists.txt | 36 +++----------------------- 7 files changed, 72 insertions(+), 68 deletions(-) delete mode 100644 cmake/git_status.cmake create mode 100644 src/Daemon/GitHash.generated.cpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..b0accceddc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,44 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) -include (cmake/git_status.cmake) + +find_package(Git) +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake deleted file mode 100644 index c1047c0ccbf..00000000000 --- a/cmake/git_status.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Print the status of the git repository (if git is available). -# This is useful for troubleshooting build failure reports - -find_package(Git) - -if (Git_FOUND) - - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_ID - OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) - -else() - message(STATUS "Git could not be found.") -endif() diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index d449768935a..157255bba12 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -266,8 +266,8 @@ private: { size_t pos = message.find('\n'); - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos)); + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -315,14 +315,14 @@ private: if (query_id.empty()) { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context) } @@ -838,6 +838,7 @@ static void blockSignals(const std::vector & signals) throw Poco::Exception("Cannot block signal."); } +extern String getGitHash(); void BaseDaemon::initializeTerminationAndSignalProcessing() { @@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #if defined(__ELF__) && !defined(OS_FREEBSD) String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); if (build_id_hex.empty()) - build_id_info = "no build id"; + build_id = ""; else - build_id_info = "build id: " + build_id_hex; + build_id = build_id_hex; #else - build_id_info = "no build id"; + build_id = ""; #endif + git_hash = getGitHash(); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 1b67ca986a8..d248ad9cec9 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -172,7 +172,8 @@ protected: DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - String build_id_info; + String build_id; + String git_hash; String stored_binary_hash; std::vector handled_signals; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 78c133d9893..7499d75d514 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,7 +1,11 @@ +set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") +configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) + add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp + ${GENERATED_GIT_HASH_CPP} ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in new file mode 100644 index 00000000000..833e9304b29 --- /dev/null +++ b/src/Daemon/GitHash.generated.cpp.in @@ -0,0 +1,10 @@ +// .cpp autogenerated by cmake + +#include + +static const String GIT_HASH = "@GIT_HASH@"; + +String getGitHash() +{ + return GIT_HASH; +} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index efc4c0ed37b..d2f7a5426db 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,49 +2,18 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") + get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) - get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) - -find_package(Git) -if(Git_FOUND) - # The commit's git hash, and whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Git branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) endfunction() + generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") @@ -78,6 +47,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) clickhouse_embed_binaries( TARGET information_schema_metadata RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" From 5c8ce2f543dd27eb623a1009ec7d040bdd78bdb5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 26 Sep 2022 14:28:03 +0000 Subject: [PATCH 59/87] More correct --- src/Storages/StorageKeeperMap.cpp | 101 +++++++++++++++--------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 11b6fe1b8dc..bde6c4df80b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap( for (size_t i = 0; i < 1000; ++i) { + std::string stored_metadata_string; + auto exists = client->tryGet(metadata_path, stored_metadata_string); + + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + root_path, + stored_metadata_string); + + auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); + + // tables_path was removed with drop + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); + continue; + } + else if (code != Coordination::Error::ZOK) + { + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path); + } + + return; + } + if (client->exists(dropped_path)) { LOG_INFO(log, "Removing leftover nodes"); @@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap( } } - std::string stored_metadata_string; - auto exists = client->tryGet(metadata_path, stored_metadata_string); + Coordination::Requests create_requests + { + zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent), + }; - if (exists) + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - root_path, - stored_metadata_string); + LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + continue; } - else + else if (code != Coordination::Error::ZOK) { - auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent); - if (code == Coordination::Error::ZNODEEXISTS) - continue; - else if (code != Coordination::Error::ZOK) - throw Coordination::Exception(code, metadata_path); + zkutil::KeeperMultiException::check(code, create_requests, create_responses); } - client->createIfNotExists(tables_path, ""); - auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); - - if (code == Coordination::Error::ZOK) - { - // metadata now should be guaranteed to exist because we added our UUID to the tables_path - client->createIfNotExists(data_path, ""); - table_is_valid = true; - return; - } - - if (code == Coordination::Error::ZNONODE) - LOG_INFO(log, "Metadata nodes were deleted in background, will retry"); - else - throw Coordination::Exception(code, table_path); + table_is_valid = true; + return; } throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); @@ -456,9 +470,9 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { - zookeeper->tryRemoveChildrenRecursive(data_path, true); + zookeeper->removeChildrenRecursive(data_path); - bool drop_done = false; + bool completely_removed = false; Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); @@ -473,33 +487,20 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E case ZOK: { metadata_drop_lock->setAlreadyRemoved(); - drop_done = true; + completely_removed = true; LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path); break; } case ZNONODE: throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of metadata. It's a bug"); case ZNOTEMPTY: - { - // valid case when this can happen is if a table checked "dropped" path just before it was created. - // new table will create data/metadata paths again while drop is in progress - // only bad thing that can happen is if we start inserting data into new table while - // we remove data here (some data can be lost) - LOG_WARNING(log, "Metadata was not completely removed from ZooKeeper. Maybe some other table is using the same path"); - - // we need to remove at least "dropped" nodes - Coordination::Requests requests; - ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); - ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1)); - zookeeper->multi(requests); - drop_done = true; + LOG_ERROR(log, "Metadata was not completely removed from ZooKeeper"); break; - } default: zkutil::KeeperMultiException::check(code, ops, responses); break; } - return drop_done; + return completely_removed; } void StorageKeeperMap::drop() From 46d45607c8faa620ea928c9202b795af1194b353 Mon Sep 17 00:00:00 2001 From: Daniel Kutenin Date: Mon, 26 Sep 2022 15:53:03 +0100 Subject: [PATCH 60/87] Disable unused command line as it does not work for assembly files --- cmake/warnings.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 89f3a62ba2e..92aebdea70d 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -46,6 +46,7 @@ if (COMPILER_CLANG) no_warning(weak-vtables) no_warning(thread-safety-negative) # experimental flag, too many false positives no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 + no_warning(unused-command-line-argument) # TODO Enable conversion, sign-conversion, double-promotion warnings. elseif (COMPILER_GCC) # Add compiler options only to c++ compiler From 9711950c35edfe6f5eadb9c96a08a26150d41939 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 15:04:56 +0000 Subject: [PATCH 61/87] Fix build --- programs/keeper/Keeper.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6d487a68111..fdfe0cef2b3 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -490,8 +490,9 @@ int Keeper::main(const std::vector & /*args*/) void Keeper::logRevision() const { Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } From 0f6a44efef4ca21cc922da0b1a67bf40f108b9db Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Sep 2022 20:11:30 +0200 Subject: [PATCH 62/87] fix missing metadata_version for old tables --- .../ReplicatedMergeTreeAttachThread.cpp | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index ba4979e57f2..90a28c373c7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int REPLICA_STATUS_CHANGED; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run() { if (const auto * coordination_exception = dynamic_cast(&e)) needs_retry = Coordination::isHardwareError(coordination_exception->code); + else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + needs_retry = true; if (needs_retry) { @@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run() void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path) { - /// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup. + /// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER. /// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica), /// but it did not work correctly, because "/metadata" node was re-created on server startup. /// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped. String replica_metadata; const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata); - if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version")) + if (!replica_metadata_exists || replica_metadata.empty()) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer " "should be done through an intermediate version (failed to get metadata or metadata_version for {}," @@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + String replica_metadata_version; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + if (replica_metadata_version_exists) + { + storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); - /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); From 99725e68d1ccf68df4b6ed05af5823cc407a40ed Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Sep 2022 19:28:27 +0000 Subject: [PATCH 63/87] Fix standalone keeper build --- programs/keeper/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..ac8f3b667f6 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp TinyContext.cpp From 9825b4d1f9e9eb9e86b27f2e1b530bb8c8705fb9 Mon Sep 17 00:00:00 2001 From: Daniel Kutenin Date: Mon, 26 Sep 2022 20:56:52 +0100 Subject: [PATCH 64/87] Move unused command line arguments to a proper place --- cmake/cpu_features.cmake | 2 +- cmake/warnings.cmake | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 35eb95a5d47..6707d703372 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -57,7 +57,7 @@ elseif (ARCH_AARCH64) # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr") + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument") endif () elseif (ARCH_PPC64LE) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 92aebdea70d..89f3a62ba2e 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -46,7 +46,6 @@ if (COMPILER_CLANG) no_warning(weak-vtables) no_warning(thread-safety-negative) # experimental flag, too many false positives no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 - no_warning(unused-command-line-argument) # TODO Enable conversion, sign-conversion, double-promotion warnings. elseif (COMPILER_GCC) # Add compiler options only to c++ compiler From 6d7de37e3d4c01d9169750f98a131cdb4238e8f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Sep 2022 23:52:14 +0200 Subject: [PATCH 65/87] Small fix in dashboard --- programs/server/dashboard.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index e63a277497a..f013e3ac064 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) { sync.sub(plots[idx]); /// Set title - const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ); + const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : ''; chart.querySelector('.title').firstChild.data = title; } From 588a5e5a42224e2e85f7878ed6fd1b0e881c85b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 07:29:18 +0000 Subject: [PATCH 66/87] Simplify a bit --- programs/keeper/CMakeLists.txt | 2 +- src/Daemon/CMakeLists.txt | 5 ++--- src/Daemon/GitHash.cpp.in | 8 ++++++++ src/Daemon/GitHash.generated.cpp.in | 10 ---------- src/Storages/System/CMakeLists.txt | 6 ++---- ...nerated.cpp.in => StorageSystemBuildOptions.cpp.in} | 2 +- 6 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 src/Daemon/GitHash.cpp.in delete mode 100644 src/Daemon/GitHash.generated.cpp.in rename src/Storages/System/{StorageSystemBuildOptions.generated.cpp.in => StorageSystemBuildOptions.cpp.in} (98%) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index ac8f3b667f6..ce176ccade5 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) - # Sraight list of all required sources + # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 7499d75d514..f02fd69aa79 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,11 +1,10 @@ -set (GENERATED_GIT_HASH_CPP "${CMAKE_CURRENT_BINARY_DIR}/GitHash.generated.cpp") -configure_file(GitHash.generated.cpp.in ${GENERATED_GIT_HASH_CPP}) +configure_file(GitHash.cpp.in GitHash.generated.cpp) add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - ${GENERATED_GIT_HASH_CPP} + GitHash.generated.cpp ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in new file mode 100644 index 00000000000..4a2da793fc2 --- /dev/null +++ b/src/Daemon/GitHash.cpp.in @@ -0,0 +1,8 @@ +// File was generated by CMake + +#include + +String getGitHash() +{ + return "@GIT_HASH@"; +} diff --git a/src/Daemon/GitHash.generated.cpp.in b/src/Daemon/GitHash.generated.cpp.in deleted file mode 100644 index 833e9304b29..00000000000 --- a/src/Daemon/GitHash.generated.cpp.in +++ /dev/null @@ -1,10 +0,0 @@ -// .cpp autogenerated by cmake - -#include - -static const String GIT_HASH = "@GIT_HASH@"; - -String getGitHash() -{ - return GIT_HASH; -} diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index d2f7a5426db..6bc080045f8 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,8 +2,6 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") - get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) @@ -11,14 +9,14 @@ function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) - configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) + configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) -list (APPEND storages_system_sources ${CONFIG_BUILD}) +list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp) add_custom_target(generate-contributors ./StorageSystemContributors.sh diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in similarity index 98% rename from src/Storages/System/StorageSystemBuildOptions.generated.cpp.in rename to src/Storages/System/StorageSystemBuildOptions.cpp.in index dde90ce459a..117d97d2cfd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// .cpp autogenerated by cmake +// File was generated by CMake const char * auto_config_build[] { From 4d2a5d752dd1b213eefc4895626fbcd8b8e20884 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 27 Sep 2022 11:36:20 +0200 Subject: [PATCH 67/87] Add failing test --- .../test_compressed_marks_restart/__init__.py | 1 + .../test_compressed_marks_restart/test.py | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/integration/test_compressed_marks_restart/__init__.py create mode 100644 tests/integration/test_compressed_marks_restart/test.py diff --git a/tests/integration/test_compressed_marks_restart/__init__.py b/tests/integration/test_compressed_marks_restart/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_compressed_marks_restart/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_compressed_marks_restart/test.py b/tests/integration/test_compressed_marks_restart/test.py new file mode 100644 index 00000000000..1c8db340655 --- /dev/null +++ b/tests/integration/test_compressed_marks_restart/test.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_compressed_marks_restart_compact(): + node.query("create table test_02381_compact (a UInt64, b String) ENGINE = MergeTree order by (a, b)") + node.query("insert into test_02381_compact values (1, 'Hello')") + node.query("alter table test_02381_compact modify setting compress_marks=true, compress_primary_key=true") + node.query("insert into test_02381_compact values (2, 'World')") + node.query("optimize table test_02381_compact final") + + assert node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") == "2\n" + node.restart_clickhouse() + assert node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") == "2\n" + +def test_compressed_marks_restart_wide(): + node.query("create table test_02381_wide (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS min_bytes_for_wide_part=0") + node.query("insert into test_02381_wide values (1, 'Hello')") + node.query("alter table test_02381_wide modify setting compress_marks=true, compress_primary_key=true") + node.query("insert into test_02381_wide values (2, 'World')") + node.query("optimize table test_02381_wide final") + + assert node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + node.restart_clickhouse() + assert node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" From 08af2a32bf4e1a646896279fbdc962aa3aecedf7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Sep 2022 09:53:26 +0000 Subject: [PATCH 68/87] Automatic style fix --- .../test_compressed_marks_restart/test.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_compressed_marks_restart/test.py b/tests/integration/test_compressed_marks_restart/test.py index 1c8db340655..90e09d62792 100644 --- a/tests/integration/test_compressed_marks_restart/test.py +++ b/tests/integration/test_compressed_marks_restart/test.py @@ -5,6 +5,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance("node", stay_alive=True) + @pytest.fixture(scope="module", autouse=True) def started_cluster(): try: @@ -16,23 +17,42 @@ def started_cluster(): def test_compressed_marks_restart_compact(): - node.query("create table test_02381_compact (a UInt64, b String) ENGINE = MergeTree order by (a, b)") + node.query( + "create table test_02381_compact (a UInt64, b String) ENGINE = MergeTree order by (a, b)" + ) node.query("insert into test_02381_compact values (1, 'Hello')") - node.query("alter table test_02381_compact modify setting compress_marks=true, compress_primary_key=true") + node.query( + "alter table test_02381_compact modify setting compress_marks=true, compress_primary_key=true" + ) node.query("insert into test_02381_compact values (2, 'World')") node.query("optimize table test_02381_compact final") - assert node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") == "2\n" + assert ( + node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") + == "2\n" + ) node.restart_clickhouse() - assert node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") == "2\n" + assert ( + node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") + == "2\n" + ) + def test_compressed_marks_restart_wide(): - node.query("create table test_02381_wide (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS min_bytes_for_wide_part=0") + node.query( + "create table test_02381_wide (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS min_bytes_for_wide_part=0" + ) node.query("insert into test_02381_wide values (1, 'Hello')") - node.query("alter table test_02381_wide modify setting compress_marks=true, compress_primary_key=true") + node.query( + "alter table test_02381_wide modify setting compress_marks=true, compress_primary_key=true" + ) node.query("insert into test_02381_wide values (2, 'World')") node.query("optimize table test_02381_wide final") - assert node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + assert ( + node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + ) node.restart_clickhouse() - assert node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + assert ( + node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + ) From 19062e9d9743f6a926d24fa26abe1f3b56cd2354 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 27 Sep 2022 14:26:45 +0300 Subject: [PATCH 69/87] Update src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp Co-authored-by: Antonio Andelic --- src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 90a28c373c7..7f91ffee1fe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -146,7 +146,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); if (replica_metadata_version_exists) { - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); + storage.metadata_version = parse(replica_metadata_version); } else { From 823d8fb6cd4bf900564e68caedffdfa57b359ac6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:43:31 +0000 Subject: [PATCH 70/87] Move git calls back into git.cmake + renamed the file from originally "git_status.cmake" to "git.cmake" (because we not longer run only "git status") --- CMakeLists.txt | 39 +-------------------------------------- cmake/git.cmake | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 38 deletions(-) create mode 100644 cmake/git.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b0accceddc3..c737046a5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,44 +18,7 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) - -find_package(Git) -# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS -if (Git_FOUND) - # Commit hash + whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_HASH}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) -else() - message(STATUS "Git could not be found.") -endif() +include (cmake/git.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/cmake/git.cmake b/cmake/git.cmake new file mode 100644 index 00000000000..93f38fd389c --- /dev/null +++ b/cmake/git.cmake @@ -0,0 +1,42 @@ +find_package(Git) + +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS. +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "HEAD's commit hash ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() + From 1f3f86e5bfd5c1358e24a7b423495ec3e312bb68 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Sep 2022 11:46:56 +0000 Subject: [PATCH 71/87] Cosmetics --- cmake/git.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/git.cmake b/cmake/git.cmake index 93f38fd389c..397ec3cd081 100644 --- a/cmake/git.cmake +++ b/cmake/git.cmake @@ -31,7 +31,7 @@ if (Git_FOUND) OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "HEAD's commit hash ${GIT_HASH}") + message(STATUS "Git HEAD commit hash: ${GIT_HASH}") execute_process( COMMAND ${GIT_EXECUTABLE} status From ce422052b014de8d681ca65ef4ad7e81ea3d52b6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Sep 2022 14:27:56 +0200 Subject: [PATCH 72/87] Fix CANNOT_READ_ALL_DATA for compact parts in case compressed_marks=1 --- .../MergeTree/MergeTreeDataPartCompact.cpp | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 5c5fc0cd8f4..2de17db38b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -110,18 +111,27 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path); - auto buffer = data_part_storage_->readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); - while (!buffer->eof()) + std::unique_ptr buffer = data_part_storage_->readFile( + marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); + + std::unique_ptr marks_reader; + bool marks_compressed = index_granularity_info_.mark_type.compressed; + if (marks_compressed) + marks_reader = std::make_unique(std::move(buffer)); + else + marks_reader = std::move(buffer); + + size_t marks_file_decompressed_size = 0; + while (!marks_reader->eof()) { - /// Skip offsets for columns - buffer->seek(columns_count * sizeof(MarkInCompressedFile), SEEK_CUR); + marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); size_t granularity; - readIntBinary(granularity, *buffer); + readIntBinary(granularity, *marks_reader); index_granularity_.appendMark(granularity); } - if (index_granularity_.getMarksCount() * index_granularity_info_.getMarkSizeInBytes(columns_count) != marks_file_size) - throw Exception("Cannot read all marks from file " + marks_file_path, ErrorCodes::CANNOT_READ_ALL_DATA); + if (!marks_compressed && index_granularity_.getMarksCount() * index_granularity_info_.getMarkSizeInBytes(columns_count) != marks_file_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all marks from file {}", marks_file_path); index_granularity_.setInitialized(); } From b2fc4b923c1ec1e66d2e3ba39988bffe320b7ead Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Sep 2022 14:31:38 +0200 Subject: [PATCH 73/87] Add functional test --- .../02454_compressed_marks_in_compact_part.reference | 1 + .../0_stateless/02454_compressed_marks_in_compact_part.sql | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference create mode 100644 tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql diff --git a/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference new file mode 100644 index 00000000000..bade13b252d --- /dev/null +++ b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference @@ -0,0 +1 @@ +2 World diff --git a/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql new file mode 100644 index 00000000000..332b1c05d6c --- /dev/null +++ b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql @@ -0,0 +1,6 @@ +drop table if exists cc sync; +create table cc (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = true; +insert into cc values (2, 'World'); +alter table cc detach part 'all_1_1_0'; +alter table cc attach part 'all_1_1_0'; +select * from cc; From 2f237a8a2c73009699bf176d74acc0a38eb7d72b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Sep 2022 15:30:18 +0200 Subject: [PATCH 74/87] Update registerStorageMergeTree.cpp --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4274386e393..6982521f76a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -683,8 +683,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) { - auto storage_policy = args.getContext()->getStoragePolicy(storage_settings->storage_policy); - return std::make_shared( zookeeper_path, replica_name, From 728fe5d06fcb4e0866b8db584dcd5b6475473a4b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 27 Sep 2022 16:00:44 +0200 Subject: [PATCH 75/87] Change log level --- src/Storages/StorageKeeperMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index bde6c4df80b..28061aaaf48 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -384,7 +384,7 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryMulti(create_requests, create_responses); if (code == Coordination::Error::ZNODEEXISTS) { - LOG_WARNING(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); continue; } else if (code != Coordination::Error::ZOK) From 4be153cbd326d47a22b3b1d13466bd02f30a7a6f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 27 Sep 2022 10:21:35 -0400 Subject: [PATCH 76/87] fix link from intro --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..972acac8aaa 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). ## Normal View From fc73b743793766efb9fea25f92920ac048647cc1 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca <113353036+tbsal@users.noreply.github.com> Date: Tue, 27 Sep 2022 14:52:48 +0200 Subject: [PATCH 77/87] Improve feedback when replacing partition with different primary key --- src/Storages/MergeTree/MergeTreeData.cpp | 3 +++ ...n_replacing_partition_with_different_primary_key.reference | 0 ...ck_when_replacing_partition_with_different_primary_key.sql | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference create mode 100644 tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ce4f1dc884d..96328bb9e89 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6144,6 +6144,9 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (format_version != src_data->format_version) throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS); + if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) + throw Exception("Tables have different primary key", ErrorCodes::BAD_ARGUMENTS); + return *src_data; } diff --git a/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql new file mode 100644 index 00000000000..d000fb4479c --- /dev/null +++ b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql @@ -0,0 +1,4 @@ +CREATE TABLE test_a (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company PRIMARY KEY (id) ORDER BY (id, company); +INSERT INTO test_a SELECT number%10 as id, number%2 as company, count() as total FROM numbers(100) GROUP BY id,company; +CREATE TABLE test_b (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company ORDER BY (id, company); +ALTER TABLE test_b REPLACE PARTITION '0' FROM test_a; -- {serverError BAD_ARGUMENTS} From 44d3eccf4ca99cb4210cb2e52226dfceafc377f6 Mon Sep 17 00:00:00 2001 From: mosinnik Date: Tue, 27 Sep 2022 19:13:40 +0300 Subject: [PATCH 78/87] Update external-data.md fix lost double hyphens --- .../engines/table-engines/special/external-data.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 95ae1aa9059..b98039f768a 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные, Таких секций может быть несколько - по числу передаваемых таблиц. -**–external** - маркер начала секции. -**–file** - путь к файлу с дампом таблицы, или -, что обозначает stdin. -Из stdin может быть считана только одна таблица. +- **--external** - маркер начала секции. +- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`. +Из `stdin` может быть считана только одна таблица. Следующие параметры не обязательные: -**–name** - имя таблицы. Если не указано - используется _data. -**–format** - формат данных в файле. Если не указано - используется TabSeparated. +- **--name** - имя таблицы. Если не указано - используется _data. +- **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -**–types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … -**–structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. From 85b44c909a2b0cb2993406073b98fee289f892f3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Sep 2022 18:16:58 +0200 Subject: [PATCH 79/87] Update MergeTreeDataPartCompact.cpp --- src/Storages/MergeTree/MergeTreeDataPartCompact.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 2de17db38b3..9298e841072 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -121,7 +121,6 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( else marks_reader = std::move(buffer); - size_t marks_file_decompressed_size = 0; while (!marks_reader->eof()) { marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); From 4f23f6ef259d1f2b772f034670e63fab95abc376 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 27 Sep 2022 14:07:35 -0400 Subject: [PATCH 80/87] fix other links Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 972acac8aaa..14c06ee0336 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized-view), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View From 134157df3d307f816eac6df77acd66edba4c8d3e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Sep 2022 22:42:39 +0200 Subject: [PATCH 81/87] Update storage_conf.xml --- tests/config/config.d/storage_conf.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index dcf4d8e9100..a2a7f5cc750 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -8,6 +8,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -15,6 +16,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -22,6 +24,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -29,6 +32,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -36,6 +40,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -43,6 +48,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 From 57614b6869f2daf4d5bf2ca6d08163cd1cc99a96 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 27 Sep 2022 23:16:16 +0200 Subject: [PATCH 82/87] Remove two redundant lines --- src/Storages/StorageReplicatedMergeTree.cpp | 35 ++++++++------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 552035f478c..beeb19fa6f9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1408,31 +1408,23 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd ops = std::move(new_ops); } - try - { - Coordination::Responses responses; - Coordination::Error e = zookeeper->tryMulti(ops, responses); - if (e == Coordination::Error::ZOK) - return transaction.commit(); + Coordination::Responses responses; + Coordination::Error e = zookeeper->tryMulti(ops, responses); + if (e == Coordination::Error::ZOK) + return transaction.commit(); - if (e == Coordination::Error::ZNODEEXISTS) + if (e == Coordination::Error::ZNODEEXISTS) + { + size_t num_check_ops = 2 * absent_part_paths_on_replicas.size(); + size_t failed_op_index = zkutil::getFailedOpIndex(e, responses); + if (failed_op_index < num_check_ops) { - size_t num_check_ops = 2 * absent_part_paths_on_replicas.size(); - size_t failed_op_index = zkutil::getFailedOpIndex(e, responses); - if (failed_op_index < num_check_ops) - { - LOG_INFO(log, "The part {} on a replica suddenly appeared, will recheck checksums", ops[failed_op_index]->getPath()); - continue; - } + LOG_INFO(log, "The part {} on a replica suddenly appeared, will recheck checksums", ops[failed_op_index]->getPath()); + continue; } + } - throw zkutil::KeeperException(e); - } - catch (const std::exception &) - { - unlockSharedData(*part); - throw; - } + throw zkutil::KeeperException(e); } } @@ -8153,7 +8145,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } catch (const Exception & ex) { - unlockSharedData(*new_data_part); LOG_WARNING(log, "Cannot commit empty part {} with error {}", lost_part_name, ex.displayText()); return false; } From 2c4a0a11a8f239370da1a10ce5a990d93cd247cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Sep 2022 01:57:47 +0300 Subject: [PATCH 83/87] Update install.sh --- tests/config/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index c77940b845e..031450df783 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -52,7 +52,7 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. -if [ "${DST_PATH}" = "/etc/clickhouse-server" ] +if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] then ln -sf $SRC_PATH/config.d/legacy_geobase.xml $DEST_SERVER_PATH/config.d/ fi From 76be0d2ee1dcbfd34b993ff69669f3989c6539c4 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 27 Sep 2022 23:07:36 +0000 Subject: [PATCH 84/87] Infer Object type only when allow_experimental_object_type is enabled --- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/JSONUtils.cpp | 6 +++++- .../0_stateless/01825_type_json_schema_inference.sh | 9 +++++---- .../queries/0_stateless/02268_json_maps_and_objects.sql | 1 + .../02326_numbers_from_json_strings_schema_inference.sql | 1 + .../queries/0_stateless/02416_json_object_inference.sql | 4 ++++ tests/queries/0_stateless/test_ugtxj2/tuples | 0 8 files changed, 18 insertions(+), 5 deletions(-) delete mode 100644 tests/queries/0_stateless/test_ugtxj2/tuples diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 5a327a2f31b..d7679416fd7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; + format_settings.json.try_infer_objects = context->getSettingsRef().allow_experimental_object_type; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b6efb0bd391..66888df7e43 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -153,6 +153,7 @@ struct FormatSettings bool try_infer_numbers_from_strings = false; bool validate_types_from_metadata = true; bool validate_utf8 = false; + bool try_infer_objects = false; } json; struct diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index e295ad44ed3..6327ffaebd4 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -225,7 +225,7 @@ namespace JSONUtils if (!type) continue; - if (isObject(type)) + if (settings.json.try_infer_objects && isObject(type)) return std::make_shared("json", true); value_types.push_back(type); @@ -240,7 +240,11 @@ namespace JSONUtils are_types_equal &= value_types[i]->equals(*value_types[0]); if (!are_types_equal) + { + if (!settings.json.try_infer_objects) + return nullptr; return std::make_shared("json", true); + } return std::make_shared(std::make_shared(), value_types[0]); } diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.sh b/tests/queries/0_stateless/01825_type_json_schema_inference.sh index 447ebdf65cb..36991bd8069 100755 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.sh +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.sh @@ -19,7 +19,8 @@ filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1" @@ -30,7 +31,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj String, s echo '{"obj": "aaa", "id": 1, "s": "foo"}' > $filename echo '{"id": 2, "obj": "bbb", "s": "bar"}' >> $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" @@ -38,14 +39,14 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": {"k3": 2}}}' > $filename ${CLICKHOUSE_CLIENT} -q "SELECT map, obj, toTypeName(map) AS map_type, toTypeName(obj) AS obj_type \ - FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow') FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 + FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow') FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (obj JSON, map Map(String, UInt64)) \ ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": 2}}' > $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1" diff --git a/tests/queries/0_stateless/02268_json_maps_and_objects.sql b/tests/queries/0_stateless/02268_json_maps_and_objects.sql index 8a9ede6876c..3e63227ba66 100644 --- a/tests/queries/0_stateless/02268_json_maps_and_objects.sql +++ b/tests/queries/0_stateless/02268_json_maps_and_objects.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3]}}, {"x" : {"c" : {"d" : 10}}}'); desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3], "c" : {"42" : 42}}}'); desc format(JSONEachRow, '{"x" : [{"a" : "Some string"}]}, {"x" : [{"b" : [1, 2, 3]}]}'); diff --git a/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql b/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql index 2012a53c09d..2f8bb3a0331 100644 --- a/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql +++ b/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest set input_format_json_try_infer_numbers_from_strings=1; +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"x" : "123"}'); desc format(JSONEachRow, '{"x" : ["123", 123, 12.3]}'); diff --git a/tests/queries/0_stateless/02416_json_object_inference.sql b/tests/queries/0_stateless/02416_json_object_inference.sql index b861468a08a..24f50930a68 100644 --- a/tests/queries/0_stateless/02416_json_object_inference.sql +++ b/tests/queries/0_stateless/02416_json_object_inference.sql @@ -1,2 +1,6 @@ -- Tags: no-fasttest +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); +set allow_experimental_object_type=0; +desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652} + diff --git a/tests/queries/0_stateless/test_ugtxj2/tuples b/tests/queries/0_stateless/test_ugtxj2/tuples deleted file mode 100644 index e69de29bb2d..00000000000 From bf4d67583042b73da14872cebdd045f62f71c996 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 28 Sep 2022 13:22:19 +0200 Subject: [PATCH 85/87] Fix backward incompatibility in Replicated database creation (#41875) --- src/Databases/DatabaseReplicated.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 507320fffde..c51864740f5 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -367,7 +367,7 @@ bool DatabaseReplicated::looksLikeReplicatedDatabasePath(const ZooKeeperPtr & cu return false; if (maybe_database_mark.starts_with(REPLICATED_DATABASE_MARK)) return true; - if (maybe_database_mark.empty()) + if (!maybe_database_mark.empty()) return false; /// Old versions did not have REPLICATED_DATABASE_MARK. Check specific nodes exist and add mark. From 9564b8b3fa3b108a56a960232e8f417c6a95336e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Sep 2022 11:30:09 +0000 Subject: [PATCH 86/87] Add comment. --- src/Interpreters/MonotonicityCheckVisitor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index a7ce0774862..c95f5209760 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -70,6 +70,9 @@ public: if (!pos) return false; + /// It is possible that tables list is empty. + /// IdentifierSemantic get the position from AST, and it can be not valid to use it. + /// Example is re-analysing a part of AST for storage Merge, see 02147_order_by_optimizations.sql if (*pos >= tables.size()) return false; From 82139fad0e6554d3f61323ed2de878b6394f8d3f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 11:47:16 +0000 Subject: [PATCH 87/87] Docs: Remove obsolete modelEvaluate() mention --- docs/en/sql-reference/functions/other-functions.md | 5 ----- docs/ru/sql-reference/functions/other-functions.md | 6 ------ docs/zh/sql-reference/functions/other-functions.md | 5 ----- 3 files changed, 16 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 877179a66a6..b80d75e3611 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1818,11 +1818,6 @@ Result: └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, …) - -Evaluate external model. -Accepts a model name and model arguments. Returns Float64. - ## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 5e89a4f1236..5c8584cd2a0 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1722,12 +1722,6 @@ SELECT joinGet(db_test.id_val,'val',toUInt32(number)) from numbers(4) SETTINGS j └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, …) {#function-modelevaluate} - -Оценивает внешнюю модель. - -Принимает на вход имя и аргументы модели. Возвращает Float64. - ## throwIf(x\[, message\[, error_code\]\]) {#throwifx-custom-message} Бросает исключение, если аргумент не равен нулю. diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index fde55ec884f..a475420ba64 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -625,11 +625,6 @@ ORDER BY k ASC 使用指定的连接键从Join类型引擎的表中获取数据。 -## modelEvaluate(model_name, …) {#function-modelevaluate} - -使用外部模型计算。 -接受模型的名称以及模型的参数。返回Float64类型的值。 - ## throwIf(x) {#throwifx} 如果参数不为零则抛出异常。